Repository: incubator-hivemall Updated Branches: refs/heads/master ad15923a1 -> 7b9e6bae6
[HOTFIX] Reverted to support Java 7 Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/7b9e6bae Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/7b9e6bae Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/7b9e6bae Branch: refs/heads/master Commit: 7b9e6bae691e746521242d0cb71270ede648e0f3 Parents: ad15923 Author: Makoto Yui <[email protected]> Authored: Thu Oct 26 20:54:32 2017 +0900 Committer: Makoto Yui <[email protected]> Committed: Thu Oct 26 20:54:32 2017 +0900 ---------------------------------------------------------------------- .travis.yml | 2 +- core/pom.xml | 2 +- .../KernelExpansionPassiveAggressiveUDTF.java | 10 ++- .../hivemall/fm/FactorizationMachineUDTF.java | 37 ++++---- .../fm/FieldAwareFactorizationMachineUDTF.java | 18 ++-- .../math/matrix/ColumnMajorFloatMatrix.java | 19 ++++ .../java/hivemall/math/matrix/FloatMatrix.java | 9 +- .../math/matrix/RowMajorFloatMatrix.java | 19 ++++ .../builders/ColumnMajorDenseMatrixBuilder.java | 7 +- .../matrix/sparse/floats/DoKFloatMatrix.java | 24 +++++- .../main/java/hivemall/recommend/SlimUDTF.java | 91 ++++++++++---------- .../hivemall/utils/collections/Fastutil.java | 89 +++++++++++++++++++ .../java/hivemall/utils/lambda/Throwing.java | 46 ---------- .../hivemall/utils/lambda/ThrowingConsumer.java | 37 -------- .../hivemall/utils/lambda/ThrowingTest.java | 66 -------------- docs/gitbook/getting_started/installation.md | 2 +- pom.xml | 8 +- 17 files changed, 242 insertions(+), 244 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7b9e6bae/.travis.yml ---------------------------------------------------------------------- diff --git a/.travis.yml b/.travis.yml index c98fe0c..ac6092c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,7 +18,7 @@ env: language: java jdk: -# - openjdk7 + - openjdk7 # - oraclejdk7 - oraclejdk8 # - oraclejdk9 http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7b9e6bae/core/pom.xml ---------------------------------------------------------------------- diff --git a/core/pom.xml b/core/pom.xml index b440946..838677c 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -145,7 +145,7 @@ <dependency> <groupId>it.unimi.dsi</groupId> <artifactId>fastutil</artifactId> - <version>[8.1.0,8.2)</version> + <version>[7.2.1,7.3)</version> <scope>compile</scope> </dependency> http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7b9e6bae/core/src/main/java/hivemall/classifier/KernelExpansionPassiveAggressiveUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/classifier/KernelExpansionPassiveAggressiveUDTF.java b/core/src/main/java/hivemall/classifier/KernelExpansionPassiveAggressiveUDTF.java index c3a1371..4ecc028 100644 --- a/core/src/main/java/hivemall/classifier/KernelExpansionPassiveAggressiveUDTF.java +++ b/core/src/main/java/hivemall/classifier/KernelExpansionPassiveAggressiveUDTF.java @@ -24,10 +24,10 @@ import hivemall.model.FeatureValue; import hivemall.model.PredictionModel; import hivemall.model.PredictionResult; import hivemall.optimizer.LossFunctions; +import hivemall.utils.collections.Fastutil; import hivemall.utils.hashing.HashFunction; import hivemall.utils.lang.Preconditions; import it.unimi.dsi.fastutil.ints.Int2FloatMap; -import it.unimi.dsi.fastutil.ints.Int2FloatMaps; import it.unimi.dsi.fastutil.ints.Int2FloatOpenHashMap; import java.util.ArrayList; @@ -116,7 +116,8 @@ public final class KernelExpansionPassiveAggressiveUDTF extends BinaryOnlineClas if (c_str != null) { c = Float.parseFloat(c_str); if (c <= 0.f) { - throw new UDFArgumentException("Aggressiveness parameter C must be C > 0: " + c); + throw new UDFArgumentException( + "Aggressiveness parameter C must be C > 0: " + c); } } algo = cl.getOptionValue("algo", algo); @@ -353,7 +354,7 @@ public final class KernelExpansionPassiveAggressiveUDTF extends BinaryOnlineClas row[2] = w1; row[3] = w2; final Int2FloatMap w2map = _w2; - for (Int2FloatMap.Entry e : Int2FloatMaps.fastIterable(_w1)) { + for (Int2FloatMap.Entry e : Fastutil.fastIterable(_w1)) { int k = e.getIntKey(); Preconditions.checkArgument(k > 0, HiveException.class); h.set(k); @@ -370,7 +371,8 @@ public final class KernelExpansionPassiveAggressiveUDTF extends BinaryOnlineClas row[4] = hk; row[5] = w3; - for (Int2FloatMap.Entry e : Int2FloatMaps.fastIterable(_w3)) { + _w3.int2FloatEntrySet(); + for (Int2FloatMap.Entry e : Fastutil.fastIterable(_w3)) { int k = e.getIntKey(); Preconditions.checkArgument(k > 0, HiveException.class); hk.set(k); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7b9e6bae/core/src/main/java/hivemall/fm/FactorizationMachineUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/fm/FactorizationMachineUDTF.java b/core/src/main/java/hivemall/fm/FactorizationMachineUDTF.java index bca1365..4eb740b 100644 --- a/core/src/main/java/hivemall/fm/FactorizationMachineUDTF.java +++ b/core/src/main/java/hivemall/fm/FactorizationMachineUDTF.java @@ -26,13 +26,13 @@ import hivemall.optimizer.EtaEstimator; import hivemall.optimizer.LossFunctions; import hivemall.optimizer.LossFunctions.LossFunction; import hivemall.optimizer.LossFunctions.LossType; +import hivemall.utils.collections.Fastutil; import hivemall.utils.hadoop.HiveUtils; import hivemall.utils.io.FileUtils; import hivemall.utils.io.NioStatefullSegment; import hivemall.utils.lang.NumberUtils; import hivemall.utils.lang.SizeOf; import hivemall.utils.math.MathUtils; -import it.unimi.dsi.fastutil.objects.Object2ObjectMaps; import java.io.File; import java.io.IOException; @@ -65,8 +65,7 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.Counters.Counter; import org.apache.hadoop.mapred.Reporter; -@Description( - name = "train_fm", +@Description(name = "train_fm", value = "_FUNC_(array<string> x, double y [, const string options]) - Returns a prediction model") public class FactorizationMachineUDTF extends UDTFWithOptions { private static final Log LOG = LogFactory.getLog(FactorizationMachineUDTF.class); @@ -204,10 +203,9 @@ public class FactorizationMachineUDTF extends UDTFWithOptions { @Override public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { if (argOIs.length != 2 && argOIs.length != 3) { - throw new UDFArgumentException( - getClass().getSimpleName() - + " takes 2 or 3 arguments: array<string> x, double y [, CONSTANT STRING options]: " - + Arrays.toString(argOIs)); + throw new UDFArgumentException(getClass().getSimpleName() + + " takes 2 or 3 arguments: array<string> x, double y [, CONSTANT STRING options]: " + + Arrays.toString(argOIs)); } this._xOI = HiveUtils.asListOI(argOIs[0]); HiveUtils.validateFeatureOI(_xOI.getListElementObjectInspector()); @@ -244,7 +242,8 @@ public class FactorizationMachineUDTF extends UDTFWithOptions { fieldNames.add("W_i"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableFloatObjectInspector); fieldNames.add("V_if"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableFloatObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableFloatObjectInspector)); return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); } @@ -311,8 +310,8 @@ public class FactorizationMachineUDTF extends UDTFWithOptions { file = File.createTempFile("hivemall_fm", ".sgmt"); file.deleteOnExit(); if (!file.canWrite()) { - throw new UDFArgumentException("Cannot write a temporary file: " - + file.getAbsolutePath()); + throw new UDFArgumentException( + "Cannot write a temporary file: " + file.getAbsolutePath()); } LOG.info("Record training examples to a file: " + file.getAbsolutePath()); } catch (IOException ioe) { @@ -516,7 +515,7 @@ public class FactorizationMachineUDTF extends UDTFWithOptions { // Wi, Vif (i starts from 1..P) forwardObjs[2] = Arrays.asList(f_Vi); - for (Map.Entry<String, Entry> e : Object2ObjectMaps.fastIterable(model.getMap())) { + for (Map.Entry<String, Entry> e : Fastutil.fastIterable(model.getMap())) { String i = e.getKey(); assert (i != null); // set i @@ -543,8 +542,8 @@ public class FactorizationMachineUDTF extends UDTFWithOptions { final boolean adaregr = _va_rand != null; final Reporter reporter = getReporter(); - final Counter iterCounter = (reporter == null) ? null : reporter.getCounter( - "hivemall.fm.FactorizationMachines$Counter", "iteration"); + final Counter iterCounter = (reporter == null) ? null + : reporter.getCounter("hivemall.fm.FactorizationMachines$Counter", "iteration"); try { if (fileIO.getPosition() == 0L) {// run iterations w/o temporary file @@ -589,8 +588,8 @@ public class FactorizationMachineUDTF extends UDTFWithOptions { try { fileIO.flush(); } catch (IOException e) { - throw new HiveException("Failed to flush a file: " - + fileIO.getFile().getAbsolutePath(), e); + throw new HiveException( + "Failed to flush a file: " + fileIO.getFile().getAbsolutePath(), e); } if (LOG.isInfoEnabled()) { File tmpFile = fileIO.getFile(); @@ -615,8 +614,8 @@ public class FactorizationMachineUDTF extends UDTFWithOptions { try { bytesRead = fileIO.read(inputBuf); } catch (IOException e) { - throw new HiveException("Failed to read a file: " - + fileIO.getFile().getAbsolutePath(), e); + throw new HiveException( + "Failed to read a file: " + fileIO.getFile().getAbsolutePath(), e); } if (bytesRead == 0) { // reached file EOF break; @@ -667,8 +666,8 @@ public class FactorizationMachineUDTF extends UDTFWithOptions { try { fileIO.close(true); } catch (IOException e) { - throw new HiveException("Failed to close a file: " - + fileIO.getFile().getAbsolutePath(), e); + throw new HiveException( + "Failed to close a file: " + fileIO.getFile().getAbsolutePath(), e); } this._inputBuf = null; this._fileIO = null; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7b9e6bae/core/src/main/java/hivemall/fm/FieldAwareFactorizationMachineUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/fm/FieldAwareFactorizationMachineUDTF.java b/core/src/main/java/hivemall/fm/FieldAwareFactorizationMachineUDTF.java index d602fd6..610fa3d 100644 --- a/core/src/main/java/hivemall/fm/FieldAwareFactorizationMachineUDTF.java +++ b/core/src/main/java/hivemall/fm/FieldAwareFactorizationMachineUDTF.java @@ -19,13 +19,13 @@ package hivemall.fm; import hivemall.fm.FMHyperParameters.FFMHyperParameters; +import hivemall.utils.collections.Fastutil; import hivemall.utils.collections.arrays.DoubleArray3D; import hivemall.utils.collections.lists.IntArrayList; import hivemall.utils.hadoop.HadoopUtils; import hivemall.utils.hadoop.HiveUtils; import hivemall.utils.math.MathUtils; import it.unimi.dsi.fastutil.ints.Int2LongMap; -import it.unimi.dsi.fastutil.ints.Int2LongMaps; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -56,8 +56,7 @@ import org.apache.hadoop.io.Text; * @link https://www.csie.ntu.edu.tw/~cjlin/libffm/ * @since v0.5-rc.1 */ -@Description( - name = "train_ffm", +@Description(name = "train_ffm", value = "_FUNC_(array<string> x, double y [, const string options]) - Returns a prediction model") public final class FieldAwareFactorizationMachineUDTF extends FactorizationMachineUDTF { private static final Log LOG = LogFactory.getLog(FieldAwareFactorizationMachineUDTF.class); @@ -86,7 +85,8 @@ public final class FieldAwareFactorizationMachineUDTF extends FactorizationMachi Options opts = super.getOptions(); opts.addOption("w0", "global_bias", false, "Whether to include global bias term w0 [default: OFF]"); - opts.addOption("disable_wi", "no_coeff", false, "Not to include linear term [default: OFF]"); + opts.addOption("disable_wi", "no_coeff", false, + "Not to include linear term [default: OFF]"); // feature hashing opts.addOption("feature_hashing", true, "The number of bits for feature hashing in range [18,31] [default: -1]. No feature hashing for -1."); @@ -101,10 +101,7 @@ public final class FieldAwareFactorizationMachineUDTF extends FactorizationMachi "Alpha value (learning rate) of Follow-The-Regularized-Reader [default: 0.2]"); opts.addOption("beta", "betaFTRL", true, "Beta value (a learning smoothing parameter) of Follow-The-Regularized-Reader [default: 1.0]"); - opts.addOption( - "l1", - "lambda1", - true, + opts.addOption("l1", "lambda1", true, "L1 regularization value of Follow-The-Regularized-Reader that controls model Sparseness [default: 0.001]"); opts.addOption("l2", "lambda2", true, "L2 regularization value of Follow-The-Regularized-Reader [default: 0.0001]"); @@ -157,7 +154,8 @@ public final class FieldAwareFactorizationMachineUDTF extends FactorizationMachi fieldOIs.add(PrimitiveObjectInspectorFactory.writableFloatObjectInspector); fieldNames.add("Vi"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableFloatObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableFloatObjectInspector)); return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); } @@ -297,7 +295,7 @@ public final class FieldAwareFactorizationMachineUDTF extends FactorizationMachi final Entry entryV = new Entry(_ffmModel._buf, _ffmModel._factor); final float[] Vf = new float[factors]; - for (Int2LongMap.Entry e : Int2LongMaps.fastIterable(_ffmModel._map)) { + for (Int2LongMap.Entry e : Fastutil.fastIterable(_ffmModel._map)) { // set i final int i = e.getIntKey(); idx.set(i); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7b9e6bae/core/src/main/java/hivemall/math/matrix/ColumnMajorFloatMatrix.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/math/matrix/ColumnMajorFloatMatrix.java b/core/src/main/java/hivemall/math/matrix/ColumnMajorFloatMatrix.java index 6067ed3..c63febc 100644 --- a/core/src/main/java/hivemall/math/matrix/ColumnMajorFloatMatrix.java +++ b/core/src/main/java/hivemall/math/matrix/ColumnMajorFloatMatrix.java @@ -18,6 +18,8 @@ */ package hivemall.math.matrix; +import javax.annotation.Nonnegative; + public abstract class ColumnMajorFloatMatrix extends ColumnMajorMatrix implements FloatMatrix { public ColumnMajorFloatMatrix() { @@ -29,4 +31,21 @@ public abstract class ColumnMajorFloatMatrix extends ColumnMajorMatrix implement return this; } + @Override + public double get(@Nonnegative final int row, @Nonnegative final int col, + final double defaultValue) { + return get(row, col, (float) defaultValue); + } + + @Override + public void set(@Nonnegative final int row, @Nonnegative final int col, final double value) { + set(row, col, (float) value); + } + + @Override + public double getAndSet(@Nonnegative final int row, @Nonnegative final int col, + final double value) { + return getAndSet(row, col, (float) value); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7b9e6bae/core/src/main/java/hivemall/math/matrix/FloatMatrix.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/math/matrix/FloatMatrix.java b/core/src/main/java/hivemall/math/matrix/FloatMatrix.java index 79c34df..afc54a0 100644 --- a/core/src/main/java/hivemall/math/matrix/FloatMatrix.java +++ b/core/src/main/java/hivemall/math/matrix/FloatMatrix.java @@ -39,7 +39,8 @@ public interface FloatMatrix extends Matrix { * @throws IndexOutOfBoundsException * @throws UnsupportedOperationException */ - public float getAndSet(@Nonnegative final int row, @Nonnegative final int col, final float value); + public float getAndSet(@Nonnegative final int row, @Nonnegative final int col, + final float value); /** * @return returns dst @@ -47,22 +48,24 @@ public interface FloatMatrix extends Matrix { @Nonnull public float[] getRow(@Nonnegative int index, @Nonnull float[] dst); + /* @Override default double get(@Nonnegative final int row, @Nonnegative final int col, final double defaultValue) { return get(row, col, (float) defaultValue); } - + @Override default void set(@Nonnegative final int row, @Nonnegative final int col, final double value) { set(row, col, (float) value); } - + @Override default double getAndSet(@Nonnegative final int row, @Nonnegative final int col, final double value) { return getAndSet(row, col, (float) value); } + */ @Override public RowMajorFloatMatrix toRowMajorMatrix(); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7b9e6bae/core/src/main/java/hivemall/math/matrix/RowMajorFloatMatrix.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/math/matrix/RowMajorFloatMatrix.java b/core/src/main/java/hivemall/math/matrix/RowMajorFloatMatrix.java index 90f7bbf..7cfd98f 100644 --- a/core/src/main/java/hivemall/math/matrix/RowMajorFloatMatrix.java +++ b/core/src/main/java/hivemall/math/matrix/RowMajorFloatMatrix.java @@ -18,6 +18,8 @@ */ package hivemall.math.matrix; +import javax.annotation.Nonnegative; + public abstract class RowMajorFloatMatrix extends RowMajorMatrix implements FloatMatrix { public RowMajorFloatMatrix() { @@ -29,4 +31,21 @@ public abstract class RowMajorFloatMatrix extends RowMajorMatrix implements Floa return this; } + @Override + public double get(@Nonnegative final int row, @Nonnegative final int col, + final double defaultValue) { + return get(row, col, (float) defaultValue); + } + + @Override + public void set(@Nonnegative final int row, @Nonnegative final int col, final double value) { + set(row, col, (float) value); + } + + @Override + public double getAndSet(@Nonnegative final int row, @Nonnegative final int col, + final double value) { + return getAndSet(row, col, (float) value); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7b9e6bae/core/src/main/java/hivemall/math/matrix/builders/ColumnMajorDenseMatrixBuilder.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/math/matrix/builders/ColumnMajorDenseMatrixBuilder.java b/core/src/main/java/hivemall/math/matrix/builders/ColumnMajorDenseMatrixBuilder.java index 152ac02..b830219 100644 --- a/core/src/main/java/hivemall/math/matrix/builders/ColumnMajorDenseMatrixBuilder.java +++ b/core/src/main/java/hivemall/math/matrix/builders/ColumnMajorDenseMatrixBuilder.java @@ -19,9 +19,9 @@ package hivemall.math.matrix.builders; import hivemall.math.matrix.dense.ColumnMajorDenseMatrix2d; +import hivemall.utils.collections.Fastutil; import hivemall.utils.collections.arrays.SparseDoubleArray; import it.unimi.dsi.fastutil.ints.Int2ObjectMap; -import it.unimi.dsi.fastutil.ints.Int2ObjectMaps; import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; import javax.annotation.Nonnegative; @@ -49,7 +49,8 @@ public final class ColumnMajorDenseMatrixBuilder extends MatrixBuilder { } @Override - public ColumnMajorDenseMatrixBuilder nextColumn(@Nonnegative final int col, final double value) { + public ColumnMajorDenseMatrixBuilder nextColumn(@Nonnegative final int col, + final double value) { if (value == 0.d) { return this; } @@ -69,7 +70,7 @@ public final class ColumnMajorDenseMatrixBuilder extends MatrixBuilder { public ColumnMajorDenseMatrix2d buildMatrix() { final double[][] data = new double[maxNumColumns][]; - for (Int2ObjectMap.Entry<SparseDoubleArray> e : Int2ObjectMaps.fastIterable(col2rows)) { + for (Int2ObjectMap.Entry<SparseDoubleArray> e : Fastutil.fastIterable(col2rows)) { int col = e.getIntKey(); SparseDoubleArray rows = e.getValue(); data[col] = rows.toArray(); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7b9e6bae/core/src/main/java/hivemall/math/matrix/sparse/floats/DoKFloatMatrix.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/math/matrix/sparse/floats/DoKFloatMatrix.java b/core/src/main/java/hivemall/math/matrix/sparse/floats/DoKFloatMatrix.java index 10929fb..36b8d7a 100644 --- a/core/src/main/java/hivemall/math/matrix/sparse/floats/DoKFloatMatrix.java +++ b/core/src/main/java/hivemall/math/matrix/sparse/floats/DoKFloatMatrix.java @@ -61,8 +61,8 @@ public final class DoKFloatMatrix extends AbstractMatrix implements FloatMatrix public DoKFloatMatrix(@Nonnegative int numRows, @Nonnegative int numCols, @Nonnegative float sparsity) { super(); - Preconditions.checkArgument(sparsity >= 0.f && sparsity <= 1.f, "Invalid Sparsity value: " - + sparsity); + Preconditions.checkArgument(sparsity >= 0.f && sparsity <= 1.f, + "Invalid Sparsity value: " + sparsity); int initialCapacity = Math.max(16384, Math.round(numRows * numCols * sparsity)); this.elements = new Long2FloatOpenHashTable(initialCapacity); elements.defaultReturnValue(0.f); @@ -205,7 +205,8 @@ public final class DoKFloatMatrix extends AbstractMatrix implements FloatMatrix } @Override - public float getAndSet(@Nonnegative final int row, @Nonnegative final int col, final float value) { + public float getAndSet(@Nonnegative final int row, @Nonnegative final int col, + final float value) { checkIndex(row, col); final long index = index(row, col); @@ -398,4 +399,21 @@ public final class DoKFloatMatrix extends AbstractMatrix implements FloatMatrix return Primitives.toLong(row, col); } + @Override + public double get(@Nonnegative final int row, @Nonnegative final int col, + final double defaultValue) { + return get(row, col, (float) defaultValue); + } + + @Override + public void set(@Nonnegative final int row, @Nonnegative final int col, final double value) { + set(row, col, (float) value); + } + + @Override + public double getAndSet(@Nonnegative final int row, @Nonnegative final int col, + final double value) { + return getAndSet(row, col, (float) value); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7b9e6bae/core/src/main/java/hivemall/recommend/SlimUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/recommend/SlimUDTF.java b/core/src/main/java/hivemall/recommend/SlimUDTF.java index 395221f..1e84dc8 100644 --- a/core/src/main/java/hivemall/recommend/SlimUDTF.java +++ b/core/src/main/java/hivemall/recommend/SlimUDTF.java @@ -24,6 +24,7 @@ import hivemall.common.ConversionState; import hivemall.math.matrix.FloatMatrix; import hivemall.math.matrix.sparse.floats.DoKFloatMatrix; import hivemall.math.vector.VectorProcedure; +import hivemall.utils.collections.Fastutil; import hivemall.utils.hadoop.HiveUtils; import hivemall.utils.io.FileUtils; import hivemall.utils.io.NioStatefullSegment; @@ -34,10 +35,8 @@ import hivemall.utils.lang.mutable.MutableDouble; import hivemall.utils.lang.mutable.MutableInt; import hivemall.utils.lang.mutable.MutableObject; import it.unimi.dsi.fastutil.ints.Int2FloatMap; -import it.unimi.dsi.fastutil.ints.Int2FloatMaps; import it.unimi.dsi.fastutil.ints.Int2FloatOpenHashMap; import it.unimi.dsi.fastutil.ints.Int2ObjectMap; -import it.unimi.dsi.fastutil.ints.Int2ObjectMaps; import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import it.unimi.dsi.fastutil.ints.IntSet; @@ -79,8 +78,7 @@ import org.apache.hadoop.mapred.Reporter; * Xia Ning and George Karypis, SLIM: Sparse Linear Methods for Top-N Recommender Systems, Proc. ICDM, 2011. * </pre> */ -@Description( - name = "train_slim", +@Description(name = "train_slim", value = "_FUNC_( int i, map<int, double> r_i, map<int, map<int, double>> topKRatesOfI, int j, map<int, double> r_j [, constant string options]) " + "- Returns row index, column index and non-zero weight value of prediction model") public class SlimUDTF extends UDTFWithOptions { @@ -174,8 +172,10 @@ public class SlimUDTF extends UDTFWithOptions { this.knnItemsOI = HiveUtils.asMapOI(argOIs[2]); this.knnItemsKeyOI = HiveUtils.asIntCompatibleOI(knnItemsOI.getMapKeyObjectInspector()); this.knnItemsValueOI = HiveUtils.asMapOI(knnItemsOI.getMapValueObjectInspector()); - this.knnItemsValueKeyOI = HiveUtils.asIntCompatibleOI(knnItemsValueOI.getMapKeyObjectInspector()); - this.knnItemsValueValueOI = HiveUtils.asDoubleCompatibleOI(knnItemsValueOI.getMapValueObjectInspector()); + this.knnItemsValueKeyOI = + HiveUtils.asIntCompatibleOI(knnItemsValueOI.getMapKeyObjectInspector()); + this.knnItemsValueValueOI = + HiveUtils.asDoubleCompatibleOI(knnItemsValueOI.getMapValueObjectInspector()); this.itemJOI = HiveUtils.asIntCompatibleOI(argOIs[3]); @@ -245,8 +245,8 @@ public class SlimUDTF extends UDTFWithOptions { numIterations = Primitives.parseInt(cl.getOptionValue("iters"), numIterations); if (numIterations <= 0) { - throw new UDFArgumentException("Argument `int iters` must be greater than 0: " - + numIterations); + throw new UDFArgumentException( + "Argument `int iters` must be greater than 0: " + numIterations); } conversionCheck = !cl.hasOption("disable_cvtest"); @@ -280,8 +280,8 @@ public class SlimUDTF extends UDTFWithOptions { if (itemI != _previousItemId || _ri == null) { // cache Ri and kNNi - this._ri = int2floatMap(itemI, riOI.getMap(args[1]), riKeyOI, riValueOI, _dataMatrix, - _ri); + this._ri = + int2floatMap(itemI, riOI.getMap(args[1]), riKeyOI, riValueOI, _dataMatrix, _ri); this._kNNi = kNNentries(args[2], knnItemsOI, knnItemsKeyOI, knnItemsValueOI, knnItemsValueKeyOI, knnItemsValueValueOI, _kNNi, _nnzKNNi); @@ -293,7 +293,8 @@ public class SlimUDTF extends UDTFWithOptions { } int itemJ = PrimitiveObjectInspectorUtils.getInt(args[3], itemJOI); - Int2FloatMap rj = int2floatMap(itemJ, rjOI.getMap(args[4]), rjKeyOI, rjValueOI, _dataMatrix); + Int2FloatMap rj = + int2floatMap(itemJ, rjOI.getMap(args[4]), rjKeyOI, rjValueOI, _dataMatrix); train(itemI, _ri, _kNNi, itemJ, rj); _observedTrainingExamples++; @@ -312,8 +313,8 @@ public class SlimUDTF extends UDTFWithOptions { file = File.createTempFile("hivemall_slim", ".sgmt"); // to save KNN data file.deleteOnExit(); if (!file.canWrite()) { - throw new UDFArgumentException("Cannot write a temporary file: " - + file.getAbsolutePath()); + throw new UDFArgumentException( + "Cannot write a temporary file: " + file.getAbsolutePath()); } } catch (IOException ioe) { throw new UDFArgumentException(ioe); @@ -336,13 +337,13 @@ public class SlimUDTF extends UDTFWithOptions { buf.putInt(itemI); buf.putInt(knnItems.size()); - for (Int2ObjectMap.Entry<Int2FloatMap> e1 : Int2ObjectMaps.fastIterable(knnItems)) { + for (Int2ObjectMap.Entry<Int2FloatMap> e1 : Fastutil.fastIterable(knnItems)) { int user = e1.getIntKey(); buf.putInt(user); Int2FloatMap ru = e1.getValue(); buf.putInt(ru.size()); - for (Int2FloatMap.Entry e2 : Int2FloatMaps.fastIterable(ru)) { + for (Int2FloatMap.Entry e2 : Fastutil.fastIterable(ru)) { buf.putInt(e2.getIntKey()); buf.putFloat(e2.getFloatValue()); } @@ -374,10 +375,10 @@ public class SlimUDTF extends UDTFWithOptions { double rateSum = 0.d; double lossSum = 0.d; - for (Int2FloatMap.Entry e : Int2FloatMaps.fastIterable(rj)) { + for (Int2FloatMap.Entry e : Fastutil.fastIterable(rj)) { int user = e.getIntKey(); double ruj = e.getFloatValue(); - double rui = ri.getOrDefault(user, 0.f); + double rui = ri.get(user); // ri.getOrDefault(user, 0.f); double eui = rui - predict(user, itemI, kNNi, itemJ, W); gradSum += ruj * eui; @@ -440,7 +441,7 @@ public class SlimUDTF extends UDTFWithOptions { } double pred = 0.d; - for (Int2FloatMap.Entry e : Int2FloatMaps.fastIterable(kNNu)) { + for (Int2FloatMap.Entry e : Fastutil.fastIterable(kNNu)) { final int itemK = e.getIntKey(); if (itemK == excludeIndex) { continue; @@ -451,8 +452,8 @@ public class SlimUDTF extends UDTFWithOptions { return pred; } - private static double getUpdateTerm(final double gradSum, final double rateSum, - final double l1, final double l2) { + private static double getUpdateTerm(final double gradSum, final double rateSum, final double l1, + final double l2) { double update = 0.d; if (Math.abs(gradSum) > l1) { if (gradSum > 0.d) { @@ -494,8 +495,8 @@ public class SlimUDTF extends UDTFWithOptions { assert (dst != null); final Reporter reporter = getReporter(); - final Counters.Counter iterCounter = (reporter == null) ? null : reporter.getCounter( - "hivemall.recommend.slim$Counter", "iteration"); + final Counters.Counter iterCounter = (reporter == null) ? null + : reporter.getCounter("hivemall.recommend.slim$Counter", "iteration"); try { if (dst.getPosition() == 0L) {// run iterations w/o temporary file @@ -518,13 +519,12 @@ public class SlimUDTF extends UDTFWithOptions { break; } } - logger.info("Performed " - + _cvState.getCurrentIteration() - + " iterations of " + logger.info("Performed " + _cvState.getCurrentIteration() + " iterations of " + NumberUtils.formatNumber(_observedTrainingExamples) + " training examples on memory (thus " - + NumberUtils.formatNumber(_observedTrainingExamples - * _cvState.getCurrentIteration()) + " training updates in total) "); + + NumberUtils.formatNumber( + _observedTrainingExamples * _cvState.getCurrentIteration()) + + " training updates in total) "); } else { // read training examples in the temporary file and invoke train for each example // write KNNi in buffer to a temporary file @@ -535,17 +535,16 @@ public class SlimUDTF extends UDTFWithOptions { try { dst.flush(); } catch (IOException e) { - throw new HiveException("Failed to flush a file: " - + dst.getFile().getAbsolutePath(), e); + throw new HiveException( + "Failed to flush a file: " + dst.getFile().getAbsolutePath(), e); } if (logger.isInfoEnabled()) { File tmpFile = dst.getFile(); - logger.info("Wrote KNN entries of axis items to a temporary file for iterative training: " - + tmpFile.getAbsolutePath() - + " (" - + FileUtils.prettyFileSize(tmpFile) - + ")"); + logger.info( + "Wrote KNN entries of axis items to a temporary file for iterative training: " + + tmpFile.getAbsolutePath() + " (" + + FileUtils.prettyFileSize(tmpFile) + ")"); } // run iterations @@ -562,8 +561,8 @@ public class SlimUDTF extends UDTFWithOptions { try { bytesRead = dst.read(buf); } catch (IOException e) { - throw new HiveException("Failed to read a file: " - + dst.getFile().getAbsolutePath(), e); + throw new HiveException( + "Failed to read a file: " + dst.getFile().getAbsolutePath(), e); } if (bytesRead == 0) { // reached file EOF break; @@ -594,13 +593,12 @@ public class SlimUDTF extends UDTFWithOptions { break; } } - logger.info("Performed " - + _cvState.getCurrentIteration() - + " iterations of " + logger.info("Performed " + _cvState.getCurrentIteration() + " iterations of " + NumberUtils.formatNumber(_observedTrainingExamples) + " training examples on memory and KNNi data on secondary storage (thus " - + NumberUtils.formatNumber(_observedTrainingExamples - * _cvState.getCurrentIteration()) + " training updates in total) "); + + NumberUtils.formatNumber( + _observedTrainingExamples * _cvState.getCurrentIteration()) + + " training updates in total) "); } } catch (Throwable e) { @@ -610,8 +608,8 @@ public class SlimUDTF extends UDTFWithOptions { try { dst.close(true); } catch (IOException e) { - throw new HiveException("Failed to close a file: " - + dst.getFile().getAbsolutePath(), e); + throw new HiveException( + "Failed to close a file: " + dst.getFile().getAbsolutePath(), e); } this._inputBuf = null; this._fileIO = null; @@ -723,15 +721,16 @@ public class SlimUDTF extends UDTFWithOptions { @Nonnull private static Int2FloatMap int2floatMap(final int item, @Nonnull final Map<?, ?> map, @Nonnull final PrimitiveObjectInspector keyOI, - @Nonnull final PrimitiveObjectInspector valueOI, @Nullable final FloatMatrix dataMatrix) { + @Nonnull final PrimitiveObjectInspector valueOI, + @Nullable final FloatMatrix dataMatrix) { return int2floatMap(item, map, keyOI, valueOI, dataMatrix, null); } @Nonnull private static Int2FloatMap int2floatMap(final int item, @Nonnull final Map<?, ?> map, @Nonnull final PrimitiveObjectInspector keyOI, - @Nonnull final PrimitiveObjectInspector valueOI, - @Nullable final FloatMatrix dataMatrix, @Nullable Int2FloatMap dst) { + @Nonnull final PrimitiveObjectInspector valueOI, @Nullable final FloatMatrix dataMatrix, + @Nullable Int2FloatMap dst) { if (dst == null) { dst = new Int2FloatOpenHashMap(map.size()); dst.defaultReturnValue(0.f); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7b9e6bae/core/src/main/java/hivemall/utils/collections/Fastutil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/Fastutil.java b/core/src/main/java/hivemall/utils/collections/Fastutil.java new file mode 100644 index 0000000..c6ace89 --- /dev/null +++ b/core/src/main/java/hivemall/utils/collections/Fastutil.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.utils.collections; + +import it.unimi.dsi.fastutil.ints.Int2FloatMap; +import it.unimi.dsi.fastutil.ints.Int2LongMap; +import it.unimi.dsi.fastutil.ints.Int2ObjectMap; +import it.unimi.dsi.fastutil.objects.Object2ObjectMap; +import it.unimi.dsi.fastutil.objects.ObjectIterable; +import it.unimi.dsi.fastutil.objects.ObjectIterator; +import it.unimi.dsi.fastutil.objects.ObjectSet; + +import javax.annotation.Nonnull; + +/** + * Helper class for fastutil (http://fastutil.di.unimi.it/) + */ +public final class Fastutil { + + private Fastutil() {} + + @Nonnull + public static ObjectIterable<Int2LongMap.Entry> fastIterable(@Nonnull final Int2LongMap map) { + final ObjectSet<Int2LongMap.Entry> entries = map.int2LongEntrySet(); + return entries instanceof Int2LongMap.FastEntrySet + ? new ObjectIterable<Int2LongMap.Entry>() { + public ObjectIterator<Int2LongMap.Entry> iterator() { + return ((Int2LongMap.FastEntrySet) entries).fastIterator(); + } + } + : entries; + } + + @Nonnull + public static ObjectIterable<Int2FloatMap.Entry> fastIterable(@Nonnull final Int2FloatMap map) { + final ObjectSet<Int2FloatMap.Entry> entries = map.int2FloatEntrySet(); + return entries instanceof Int2FloatMap.FastEntrySet + ? new ObjectIterable<Int2FloatMap.Entry>() { + public ObjectIterator<Int2FloatMap.Entry> iterator() { + return ((Int2FloatMap.FastEntrySet) entries).fastIterator(); + } + } + : entries; + } + + @Nonnull + public static <V> ObjectIterable<Int2ObjectMap.Entry<V>> fastIterable( + @Nonnull final Int2ObjectMap<V> map) { + final ObjectSet<Int2ObjectMap.Entry<V>> entries = map.int2ObjectEntrySet(); + return entries instanceof Int2ObjectMap.FastEntrySet + ? new ObjectIterable<Int2ObjectMap.Entry<V>>() { + public ObjectIterator<Int2ObjectMap.Entry<V>> iterator() { + return ((Int2ObjectMap.FastEntrySet<V>) entries).fastIterator(); + } + } + : entries; + } + + @Nonnull + public static <K, V> ObjectIterable<Object2ObjectMap.Entry<K, V>> fastIterable( + @Nonnull final Object2ObjectMap<K, V> map) { + final ObjectSet<Object2ObjectMap.Entry<K, V>> entries = map.object2ObjectEntrySet(); + return entries instanceof Object2ObjectMap.FastEntrySet + ? new ObjectIterable<Object2ObjectMap.Entry<K, V>>() { + @SuppressWarnings("unchecked") + public ObjectIterator<Object2ObjectMap.Entry<K, V>> iterator() { + return ((Object2ObjectMap.FastEntrySet<K, V>) entries).fastIterator(); + } + } + : entries; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7b9e6bae/core/src/main/java/hivemall/utils/lambda/Throwing.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/lambda/Throwing.java b/core/src/main/java/hivemall/utils/lambda/Throwing.java deleted file mode 100644 index 795314e..0000000 --- a/core/src/main/java/hivemall/utils/lambda/Throwing.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.lambda; - -import java.util.function.Consumer; - -import javax.annotation.Nonnull; - -public final class Throwing { - - private Throwing() {} - - @Nonnull - public static <T> Consumer<T> rethrow(@Nonnull final ThrowingConsumer<T> consumer) { - return consumer; - } - - /** - * The compiler sees the signature with the throws T inferred to a RuntimeException type, so it - * allows the unchecked exception to propagate. - * - * http://www.baeldung.com/java-sneaky-throws - */ - @SuppressWarnings("unchecked") - @Nonnull - public static <E extends Throwable> void sneakyThrow(@Nonnull Throwable ex) throws E { - throw (E) ex; - } - -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7b9e6bae/core/src/main/java/hivemall/utils/lambda/ThrowingConsumer.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/lambda/ThrowingConsumer.java b/core/src/main/java/hivemall/utils/lambda/ThrowingConsumer.java deleted file mode 100644 index 7efd652..0000000 --- a/core/src/main/java/hivemall/utils/lambda/ThrowingConsumer.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.lambda; - -import java.util.function.Consumer; - -@FunctionalInterface -public interface ThrowingConsumer<T> extends Consumer<T> { - - @Override - default void accept(final T e) { - try { - accept0(e); - } catch (Throwable ex) { - Throwing.sneakyThrow(ex); - } - } - - void accept0(T e) throws Throwable; - -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7b9e6bae/core/src/test/java/hivemall/utils/lambda/ThrowingTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/utils/lambda/ThrowingTest.java b/core/src/test/java/hivemall/utils/lambda/ThrowingTest.java deleted file mode 100644 index 8eab9f3..0000000 --- a/core/src/test/java/hivemall/utils/lambda/ThrowingTest.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.lambda; - -import static hivemall.utils.lambda.Throwing.rethrow; - -import java.io.IOException; -import java.util.Arrays; - -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; - -public class ThrowingTest { - - @Rule - public ExpectedException thrown = ExpectedException.none(); - - @Test - public void testRethrow() { - thrown.expect(IOException.class); - thrown.expectMessage("i=3"); - - Arrays.asList(1, 2, 3).forEach(rethrow(e -> { - int i = e.intValue(); - if (i == 3) { - throw new IOException("i=" + i); - } - })); - } - - @Test(expected = IOException.class) - public void testSneakyThrow() { - Throwing.sneakyThrow(new IOException()); - } - - @Test - public void testThrowingConsumer() { - thrown.expect(IOException.class); - thrown.expectMessage("i=3"); - - Arrays.asList(1, 2, 3).forEach((ThrowingConsumer<Integer>) e -> { - int i = e.intValue(); - if (i == 3) { - throw new IOException("i=" + i); - } - }); - } - -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7b9e6bae/docs/gitbook/getting_started/installation.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/getting_started/installation.md b/docs/gitbook/getting_started/installation.md index 26db29c..ee07afb 100644 --- a/docs/gitbook/getting_started/installation.md +++ b/docs/gitbook/getting_started/installation.md @@ -22,7 +22,7 @@ Prerequisites * Hadoop v2.4.0 or later * Hive v0.13 or later -* Java 8 or later (Java 7 in v0.4.2 or before) +* Java 7 or later * [hivemall-core-xxx-with-dependencies.jar](https://github.com/myui/hivemall/releases) * [define-all.hive](https://github.com/myui/hivemall/releases) http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7b9e6bae/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 9e26145..64aec8f 100644 --- a/pom.xml +++ b/pom.xml @@ -252,8 +252,8 @@ </modules> <properties> - <java.source.version>1.8</java.source.version> - <java.target.version>1.8</java.target.version> + <java.source.version>1.7</java.source.version> + <java.target.version>1.7</java.target.version> <scala.version>2.11.8</scala.version> <scala.binary.version>2.11</scala.binary.version> <maven.build.timestamp.format>yyyy</maven.build.timestamp.format> @@ -589,7 +589,6 @@ Cobertura does not support Java 8 yet https://github.com/cobertura/cobertura/issues/166 --> - <!-- <plugin> <groupId>org.codehaus.mojo</groupId> <artifactId>cobertura-maven-plugin</artifactId> @@ -600,7 +599,7 @@ <aggregate>true</aggregate> </configuration> </plugin> - --> + <!-- <plugin> <groupId>org.jacoco</groupId> <artifactId>jacoco-maven-plugin</artifactId> @@ -614,6 +613,7 @@ </execution> </executions> </plugin> + --> <!-- end overalls --> <!-- start sonatype deploy --> <!-- mvn clean deploy -DperformRelease=true -Dskiptests=true -Dmaven.test.skip=true -->
