Repository: incubator-hivemall Updated Branches: refs/heads/master 50b4c9a75 -> a780fcdbf
[HIVEMALL-101] Fixed CI errors Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/a780fcdb Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/a780fcdb Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/a780fcdb Branch: refs/heads/master Commit: a780fcdbf44274bc1be37a9818f822bc27243031 Parents: 50b4c9a Author: Makoto Yui <[email protected]> Authored: Thu Jun 15 17:11:11 2017 +0900 Committer: Makoto Yui <[email protected]> Committed: Thu Jun 15 17:11:11 2017 +0900 ---------------------------------------------------------------------- .../classifier/GeneralClassifierUDTF.java | 12 ++- .../optimizer/DenseOptimizerFactory.java | 7 ++ .../java/hivemall/optimizer/LossFunctions.java | 82 ++++++++++++++++---- .../optimizer/SparseOptimizerFactory.java | 7 ++ .../regression/GeneralRegressionUDTF.java | 3 +- .../classifier/GeneralClassifierUDTFTest.java | 8 +- .../regression/GeneralRegressionUDTFTest.java | 7 +- 7 files changed, 97 insertions(+), 29 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/a780fcdb/core/src/main/java/hivemall/classifier/GeneralClassifierUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/classifier/GeneralClassifierUDTF.java b/core/src/main/java/hivemall/classifier/GeneralClassifierUDTF.java index d7cb539..e5f06d8 100644 --- a/core/src/main/java/hivemall/classifier/GeneralClassifierUDTF.java +++ b/core/src/main/java/hivemall/classifier/GeneralClassifierUDTF.java @@ -41,8 +41,8 @@ public final class GeneralClassifierUDTF extends GeneralLearnerBaseUDTF { @Override protected String getLossOptionDescription() { - return "Loss function [default: HingeLoss, LogLoss, SquaredHingeLoss, ModifiedHuberLoss, " - + "SquaredLoss, QuantileLoss, EpsilonInsensitiveLoss, HuberLoss]"; + return "Loss function [HingeLoss (default), LogLoss, SquaredHingeLoss, ModifiedHuberLoss, \n" + + ", or a regression loss: SquaredLoss, QuantileLoss, EpsilonInsensitiveLoss, HuberLoss]"; } @Override @@ -51,11 +51,9 @@ public final class GeneralClassifierUDTF extends GeneralLearnerBaseUDTF { } @Override - protected void checkLossFunction(LossFunction lossFunction) throws UDFArgumentException { - if(!lossFunction.forBinaryClassification()) { - throw new UDFArgumentException("The loss function `" + lossFunction.getType() - + "` is not designed for binary classification"); - } + protected void checkLossFunction(@Nonnull LossFunction lossFunction) + throws UDFArgumentException { + // will accepts both binary loss and } @Override http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/a780fcdb/core/src/main/java/hivemall/optimizer/DenseOptimizerFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/optimizer/DenseOptimizerFactory.java b/core/src/main/java/hivemall/optimizer/DenseOptimizerFactory.java index e273f91..775d7d0 100644 --- a/core/src/main/java/hivemall/optimizer/DenseOptimizerFactory.java +++ b/core/src/main/java/hivemall/optimizer/DenseOptimizerFactory.java @@ -42,6 +42,13 @@ public final class DenseOptimizerFactory { throw new IllegalArgumentException("`optimizer` not defined"); } + if ("rda".equalsIgnoreCase(options.get("regularization")) + && "adagrad".equalsIgnoreCase(optimizerName) == false) { + throw new IllegalArgumentException( + "`-regularization rda` is only supported for AdaGrad but `-optimizer " + + optimizerName); + } + final Optimizer optimizerImpl; if ("sgd".equalsIgnoreCase(optimizerName)) { optimizerImpl = new Optimizer.SGD(options); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/a780fcdb/core/src/main/java/hivemall/optimizer/LossFunctions.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/optimizer/LossFunctions.java b/core/src/main/java/hivemall/optimizer/LossFunctions.java index a1ade3d..46c07ba 100644 --- a/core/src/main/java/hivemall/optimizer/LossFunctions.java +++ b/core/src/main/java/hivemall/optimizer/LossFunctions.java @@ -29,27 +29,32 @@ import javax.annotation.Nullable; public final class LossFunctions { public enum LossType { - SquaredLoss, QuantileLoss, EpsilonInsensitiveLoss, HuberLoss, HingeLoss, LogLoss, - SquaredHingeLoss, ModifiedHuberLoss + SquaredLoss, QuantileLoss, EpsilonInsensitiveLoss, SquaredEpsilonInsensitiveLoss, + HuberLoss, HingeLoss, LogLoss, SquaredHingeLoss, ModifiedHuberLoss } @Nonnull public static LossFunction getLossFunction(@Nullable final String type) { - if ("SquaredLoss".equalsIgnoreCase(type)) { + final String t = type.toLowerCase(); + if ("squaredloss".equals(t) || "squared".equals(t)) { return new SquaredLoss(); - } else if ("QuantileLoss".equalsIgnoreCase(type)) { + } else if ("quantileloss".equals(t) || "quantile".equals(t)) { return new QuantileLoss(); - } else if ("EpsilonInsensitiveLoss".equalsIgnoreCase(type)) { + } else if ("epsiloninsensitiveloss".equals(t) || "epsilon_insensitive".equals(t)) { return new EpsilonInsensitiveLoss(); - } else if ("HuberLoss".equalsIgnoreCase(type)) { + } else if ("squaredepsiloninsensitiveloss".equals(t) + || "squared_epsilon_insensitive".equals(t)) { + return new SquaredEpsilonInsensitiveLoss(); + } else if ("huberloss".equals(t) || "huber".equals(t)) { return new HuberLoss(); - } else if ("HingeLoss".equalsIgnoreCase(type)) { + } else if ("hingeloss".equals(t) || "hinge".equals(t)) { return new HingeLoss(); - } else if ("LogLoss".equalsIgnoreCase(type) || "LogisticLoss".equalsIgnoreCase(type)) { + } else if ("logloss".equals(t) || "log".equals(t) || "logisticloss".equals(t) + || "logistic".equals(t)) { return new LogLoss(); - } else if ("SquaredHingeLoss".equalsIgnoreCase(type)) { + } else if ("squaredhingeloss".equals(t) || "squared_hinge".equals(t)) { return new SquaredHingeLoss(); - } else if ("ModifiedHuberLoss".equalsIgnoreCase(type)) { + } else if ("modifiedhuberloss".equals(t) || "modified_huber".equals(t)) { return new ModifiedHuberLoss(); } throw new IllegalArgumentException("Unsupported loss function name: " + type); @@ -64,6 +69,8 @@ public final class LossFunctions { return new QuantileLoss(); case EpsilonInsensitiveLoss: return new EpsilonInsensitiveLoss(); + case SquaredEpsilonInsensitiveLoss: + return new SquaredEpsilonInsensitiveLoss(); case HuberLoss: return new HuberLoss(); case HingeLoss: @@ -272,11 +279,11 @@ public final class LossFunctions { public float dloss(final float p, final float y) { if ((y - p) > epsilon) {// real value > predicted value - epsilon return -1.f; - } - if ((p - y) > epsilon) {// real value < predicted value - epsilon + } else if ((p - y) > epsilon) {// real value < predicted value - epsilon return 1.f; + } else { + return 0.f; } - return 0.f; } @Override @@ -286,6 +293,55 @@ public final class LossFunctions { } /** + * Squared Epsilon-Insensitive loss. <code>loss = max(0, |y - p| - epsilon)^2</code> + */ + public static final class SquaredEpsilonInsensitiveLoss extends RegressionLoss { + + private float epsilon; + + public SquaredEpsilonInsensitiveLoss() { + this(0.1f); + } + + public SquaredEpsilonInsensitiveLoss(float epsilon) { + this.epsilon = epsilon; + } + + public void setEpsilon(float epsilon) { + this.epsilon = epsilon; + } + + @Override + public float loss(final float p, final float y) { + float d = Math.abs(y - p) - epsilon; + return (d > 0.f) ? (d * d) : 0.f; + } + + @Override + public double loss(final double p, final double y) { + double d = Math.abs(y - p) - epsilon; + return (d > 0.d) ? (d * d) : 0.d; + } + + @Override + public float dloss(final float p, final float y) { + final float z = y - p; + if (z > epsilon) { + return -2 * (z - epsilon); + } else if (-z > epsilon) { + return 2 * (-z - epsilon); + } else { + return 0.f; + } + } + + @Override + public LossType getType() { + return LossType.SquaredEpsilonInsensitiveLoss; + } + } + + /** * Huber regression loss. * * Variant of the SquaredLoss which is robust to outliers. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/a780fcdb/core/src/main/java/hivemall/optimizer/SparseOptimizerFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/optimizer/SparseOptimizerFactory.java b/core/src/main/java/hivemall/optimizer/SparseOptimizerFactory.java index 7bcac1b..12e0d71 100644 --- a/core/src/main/java/hivemall/optimizer/SparseOptimizerFactory.java +++ b/core/src/main/java/hivemall/optimizer/SparseOptimizerFactory.java @@ -40,6 +40,13 @@ public final class SparseOptimizerFactory { throw new IllegalArgumentException("`optimizer` not defined"); } + if ("rda".equalsIgnoreCase(options.get("regularization")) + && "adagrad".equalsIgnoreCase(optimizerName) == false) { + throw new IllegalArgumentException( + "`-regularization rda` is only supported for AdaGrad but `-optimizer " + + optimizerName); + } + final Optimizer optimizerImpl; if ("sgd".equalsIgnoreCase(optimizerName)) { optimizerImpl = new Optimizer.SGD(options); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/a780fcdb/core/src/main/java/hivemall/regression/GeneralRegressionUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/regression/GeneralRegressionUDTF.java b/core/src/main/java/hivemall/regression/GeneralRegressionUDTF.java index 160d92d..1bd9393 100644 --- a/core/src/main/java/hivemall/regression/GeneralRegressionUDTF.java +++ b/core/src/main/java/hivemall/regression/GeneralRegressionUDTF.java @@ -41,7 +41,8 @@ public final class GeneralRegressionUDTF extends GeneralLearnerBaseUDTF { @Override protected String getLossOptionDescription() { - return "Loss function [default: SquaredLoss, QuantileLoss, EpsilonInsensitiveLoss, HuberLoss]"; + return "Loss function [default: SquaredLoss/squared, QuantileLoss/quantile, " + + "EpsilonInsensitiveLoss/epsilon_insensitive, HuberLoss/huber]"; } @Override http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/a780fcdb/core/src/test/java/hivemall/classifier/GeneralClassifierUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/classifier/GeneralClassifierUDTFTest.java b/core/src/test/java/hivemall/classifier/GeneralClassifierUDTFTest.java index 6ed783c..1c7a90e 100644 --- a/core/src/test/java/hivemall/classifier/GeneralClassifierUDTFTest.java +++ b/core/src/test/java/hivemall/classifier/GeneralClassifierUDTFTest.java @@ -18,6 +18,8 @@ */ package hivemall.classifier; +import hivemall.utils.math.MathUtils; + import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; @@ -29,7 +31,7 @@ import java.util.List; import java.util.StringTokenizer; import java.util.zip.GZIPInputStream; -import hivemall.utils.math.MathUtils; +import javax.annotation.Nonnull; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -41,8 +43,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn import org.junit.Assert; import org.junit.Test; -import javax.annotation.Nonnull; - public class GeneralClassifierUDTFTest { private static final boolean DEBUG = false; @@ -148,7 +148,7 @@ public class GeneralClassifierUDTFTest { String[] regularizations = new String[] {"NO", "L1", "L2", "ElasticNet", "RDA"}; String[] lossFunctions = new String[] {"HingeLoss", "LogLoss", "SquaredHingeLoss", "ModifiedHuberLoss", "SquaredLoss", "QuantileLoss", "EpsilonInsensitiveLoss", - "HuberLoss"}; + "SquaredEpsilonInsensitiveLoss", "HuberLoss"}; for (String opt : optimizers) { for (String reg : regularizations) { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/a780fcdb/core/src/test/java/hivemall/regression/GeneralRegressionUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/regression/GeneralRegressionUDTFTest.java b/core/src/test/java/hivemall/regression/GeneralRegressionUDTFTest.java index df5c643..cfe9651 100644 --- a/core/src/test/java/hivemall/regression/GeneralRegressionUDTFTest.java +++ b/core/src/test/java/hivemall/regression/GeneralRegressionUDTFTest.java @@ -22,18 +22,17 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import javax.annotation.Nonnull; + import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; - import org.junit.Assert; import org.junit.Test; -import javax.annotation.Nonnull; - public class GeneralRegressionUDTFTest { private static final boolean DEBUG = false; @@ -156,7 +155,7 @@ public class GeneralRegressionUDTFTest { String[] optimizers = new String[] {"SGD", "AdaDelta", "AdaGrad", "Adam"}; String[] regularizations = new String[] {"NO", "L1", "L2", "ElasticNet", "RDA"}; String[] lossFunctions = new String[] {"SquaredLoss", "QuantileLoss", - "EpsilonInsensitiveLoss", "HuberLoss"}; + "EpsilonInsensitiveLoss", "SquaredEpsilonInsensitiveLoss", "HuberLoss"}; for (String opt : optimizers) { for (String reg : regularizations) {
