[HIVEMALL-5] Fixed style and applied mvn formatter:format
Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/e9c66f0a Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/e9c66f0a Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/e9c66f0a Branch: refs/heads/master Commit: e9c66f0a1f3789cabd06e3b9fcaa56d52f05c2a0 Parents: b8a683c Author: Makoto Yui <[email protected]> Authored: Wed Jul 5 20:51:55 2017 +0900 Committer: Makoto Yui <[email protected]> Committed: Wed Jul 5 21:10:18 2017 +0900 ---------------------------------------------------------------------- .../java/hivemall/anomaly/ChangeFinderUDF.java | 2 +- .../anomaly/SingularSpectrumTransform.java | 2 +- .../anomaly/SingularSpectrumTransformUDF.java | 6 +- .../main/java/hivemall/evaluation/MAPUDAF.java | 4 +- .../main/java/hivemall/evaluation/MRRUDAF.java | 4 +- .../main/java/hivemall/evaluation/NDCGUDAF.java | 8 +- .../java/hivemall/evaluation/PrecisionUDAF.java | 4 +- .../java/hivemall/evaluation/RecallUDAF.java | 4 +- .../ftvec/binning/NumericHistogram.java | 5 +- .../ftvec/scaling/L2NormalizationUDF.java | 4 +- .../hivemall/ftvec/selection/ChiSquareUDF.java | 10 +- .../main/java/hivemall/model/WeightValue.java | 4 +- .../java/hivemall/optimizer/EtaEstimator.java | 3 +- .../java/hivemall/optimizer/LossFunctions.java | 10 +- .../main/java/hivemall/optimizer/Optimizer.java | 7 +- .../hivemall/regression/RegressionBaseUDTF.java | 3 +- .../smile/classification/DecisionTree.java | 105 ++++++----- .../RandomForestClassifierUDTF.java | 3 +- .../regression/RandomForestRegressionUDTF.java | 3 +- .../smile/regression/RegressionTree.java | 79 +++++---- .../java/hivemall/tools/math/L2NormUDAF.java | 3 +- .../hivemall/topicmodel/OnlineLDAModel.java | 3 +- .../utils/collections/arrays/DoubleArray.java | 2 +- .../hivemall/utils/collections/sets/IntSet.java | 2 +- .../java/hivemall/utils/lang/ArrayUtils.java | 8 +- .../java/hivemall/utils/math/MathUtils.java | 18 +- .../java/hivemall/utils/math/MatrixUtils.java | 10 +- .../utils/sampling/IntReservoirSampler.java | 3 +- .../hivemall/anomaly/ChangeFinder2DTest.java | 2 +- .../evaluation/BinaryResponsesMeasuresTest.java | 9 +- .../evaluation/GradedResponsesMeasuresTest.java | 3 +- .../knn/similarity/DIMSUMMapperUDTFTest.java | 3 +- .../hivemall/utils/codec/DeflateCodecTest.java | 3 +- .../java/hivemall/mix/server/MixServerTest.java | 4 +- .../java/hivemall/nlp/tokenizer/SmartcnUDF.java | 174 +++++++++---------- .../hivemall/nlp/tokenizer/SmartcnUDFTest.java | 84 +++++---- resources/eclipse-style.xml | 2 +- .../java/hivemall/xgboost/NativeLibLoader.java | 45 +++-- .../hivemall/xgboost/XGBoostPredictUDTF.java | 33 ++-- .../main/java/hivemall/xgboost/XGBoostUDTF.java | 74 ++++---- .../java/hivemall/xgboost/XGBoostUtils.java | 8 +- .../XGBoostBinaryClassifierUDTF.java | 11 +- .../XGBoostMulticlassClassifierUDTF.java | 27 ++- .../regression/XGBoostRegressionUDTF.java | 11 +- .../tools/XGBoostMulticlassPredictUDTF.java | 20 +-- .../xgboost/tools/XGBoostPredictUDTF.java | 18 +- 46 files changed, 457 insertions(+), 393 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/anomaly/ChangeFinderUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/anomaly/ChangeFinderUDF.java b/core/src/main/java/hivemall/anomaly/ChangeFinderUDF.java index e786e86..b6c71c9 100644 --- a/core/src/main/java/hivemall/anomaly/ChangeFinderUDF.java +++ b/core/src/main/java/hivemall/anomaly/ChangeFinderUDF.java @@ -52,7 +52,7 @@ import org.apache.hadoop.io.BooleanWritable; + " - Returns outlier/change-point scores and decisions using ChangeFinder." + " It will return a tuple <double outlier_score, double changepoint_score [, boolean is_anomaly [, boolean is_changepoint]]") @UDFType(deterministic = false, stateful = true) -@Since(version="0.5-rc.1") +@Since(version = "0.5-rc.1") public final class ChangeFinderUDF extends UDFWithOptions { private transient Parameters _params; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/anomaly/SingularSpectrumTransform.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/anomaly/SingularSpectrumTransform.java b/core/src/main/java/hivemall/anomaly/SingularSpectrumTransform.java index ba1c9c0..34d85aa 100644 --- a/core/src/main/java/hivemall/anomaly/SingularSpectrumTransform.java +++ b/core/src/main/java/hivemall/anomaly/SingularSpectrumTransform.java @@ -190,7 +190,7 @@ final class SingularSpectrumTransform implements SingularSpectrumTransformInterf double s = 0.d; for (int i = 0; i < r; i++) { - if(!indicies.hasNext()) { + if (!indicies.hasNext()) { throw new IllegalStateException("Should not happen"); } double v = eigvecs.getEntry(0, indicies.next().intValue()); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/anomaly/SingularSpectrumTransformUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/anomaly/SingularSpectrumTransformUDF.java b/core/src/main/java/hivemall/anomaly/SingularSpectrumTransformUDF.java index 7b1f9e3..d718e12 100644 --- a/core/src/main/java/hivemall/anomaly/SingularSpectrumTransformUDF.java +++ b/core/src/main/java/hivemall/anomaly/SingularSpectrumTransformUDF.java @@ -49,7 +49,9 @@ import org.apache.hadoop.io.BooleanWritable; * * References: * <ul> - * <li>T. Ide and K. Inoue, "Knowledge Discovery from Heterogeneous Dynamic Systems using Change-Point Correlations", SDM'05.</li> + * <li>T. Ide and K. Inoue, + * "Knowledge Discovery from Heterogeneous Dynamic Systems using Change-Point Correlations", SDM'05. + * </li> * <li>T. Ide and K. Tsuda, "Change-point detection using Krylov subspace learning", SDM'07.</li> * </ul> */ @@ -59,7 +61,7 @@ import org.apache.hadoop.io.BooleanWritable; + " - Returns change-point scores and decisions using Singular Spectrum Transformation (SST)." + " It will return a tuple <double changepoint_score [, boolean is_changepoint]>") @UDFType(deterministic = false, stateful = true) -@Since(version="0.5-rc.1") +@Since(version = "0.5-rc.1") public final class SingularSpectrumTransformUDF extends UDFWithOptions { private transient Parameters _params; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/evaluation/MAPUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/evaluation/MAPUDAF.java b/core/src/main/java/hivemall/evaluation/MAPUDAF.java index a7fe5a9..cac6de5 100644 --- a/core/src/main/java/hivemall/evaluation/MAPUDAF.java +++ b/core/src/main/java/hivemall/evaluation/MAPUDAF.java @@ -65,12 +65,12 @@ public final class MAPUDAF extends AbstractGenericUDAFResolver { ListTypeInfo arg1type = HiveUtils.asListTypeInfo(typeInfo[0]); if (!HiveUtils.isPrimitiveTypeInfo(arg1type.getListElementTypeInfo())) { throw new UDFArgumentTypeException(0, - "The first argument `array rankItems` is invalid form: " + typeInfo[0]); + "The first argument `array rankItems` is invalid form: " + typeInfo[0]); } ListTypeInfo arg2type = HiveUtils.asListTypeInfo(typeInfo[1]); if (!HiveUtils.isPrimitiveTypeInfo(arg2type.getListElementTypeInfo())) { throw new UDFArgumentTypeException(1, - "The second argument `array correctItems` is invalid form: " + typeInfo[1]); + "The second argument `array correctItems` is invalid form: " + typeInfo[1]); } return new Evaluator(); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/evaluation/MRRUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/evaluation/MRRUDAF.java b/core/src/main/java/hivemall/evaluation/MRRUDAF.java index 68b1df5..41a236d 100644 --- a/core/src/main/java/hivemall/evaluation/MRRUDAF.java +++ b/core/src/main/java/hivemall/evaluation/MRRUDAF.java @@ -65,12 +65,12 @@ public final class MRRUDAF extends AbstractGenericUDAFResolver { ListTypeInfo arg1type = HiveUtils.asListTypeInfo(typeInfo[0]); if (!HiveUtils.isPrimitiveTypeInfo(arg1type.getListElementTypeInfo())) { throw new UDFArgumentTypeException(0, - "The first argument `array rankItems` is invalid form: " + typeInfo[0]); + "The first argument `array rankItems` is invalid form: " + typeInfo[0]); } ListTypeInfo arg2type = HiveUtils.asListTypeInfo(typeInfo[1]); if (!HiveUtils.isPrimitiveTypeInfo(arg2type.getListElementTypeInfo())) { throw new UDFArgumentTypeException(1, - "The second argument `array correctItems` is invalid form: " + typeInfo[1]); + "The second argument `array correctItems` is invalid form: " + typeInfo[1]); } return new Evaluator(); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/evaluation/NDCGUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/evaluation/NDCGUDAF.java b/core/src/main/java/hivemall/evaluation/NDCGUDAF.java index 8c0d590..f50d27a 100644 --- a/core/src/main/java/hivemall/evaluation/NDCGUDAF.java +++ b/core/src/main/java/hivemall/evaluation/NDCGUDAF.java @@ -64,15 +64,15 @@ public final class NDCGUDAF extends AbstractGenericUDAFResolver { } ListTypeInfo arg1type = HiveUtils.asListTypeInfo(typeInfo[0]); - if (!HiveUtils.isPrimitiveTypeInfo(arg1type.getListElementTypeInfo()) - && !HiveUtils.isStructTypeInfo(arg1type.getListElementTypeInfo())) { + if (!HiveUtils.isPrimitiveTypeInfo(arg1type.getListElementTypeInfo()) + && !HiveUtils.isStructTypeInfo(arg1type.getListElementTypeInfo())) { throw new UDFArgumentTypeException(0, - "The first argument `array rankItems` is invalid form: " + typeInfo[0]); + "The first argument `array rankItems` is invalid form: " + typeInfo[0]); } ListTypeInfo arg2type = HiveUtils.asListTypeInfo(typeInfo[1]); if (!HiveUtils.isPrimitiveTypeInfo(arg2type.getListElementTypeInfo())) { throw new UDFArgumentTypeException(1, - "The second argument `array correctItems` is invalid form: " + typeInfo[1]); + "The second argument `array correctItems` is invalid form: " + typeInfo[1]); } return new Evaluator(); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java b/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java index 69d901a..d4fad41 100644 --- a/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java +++ b/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java @@ -65,12 +65,12 @@ public final class PrecisionUDAF extends AbstractGenericUDAFResolver { ListTypeInfo arg1type = HiveUtils.asListTypeInfo(typeInfo[0]); if (!HiveUtils.isPrimitiveTypeInfo(arg1type.getListElementTypeInfo())) { throw new UDFArgumentTypeException(0, - "The first argument `array rankItems` is invalid form: " + typeInfo[0]); + "The first argument `array rankItems` is invalid form: " + typeInfo[0]); } ListTypeInfo arg2type = HiveUtils.asListTypeInfo(typeInfo[1]); if (!HiveUtils.isPrimitiveTypeInfo(arg2type.getListElementTypeInfo())) { throw new UDFArgumentTypeException(1, - "The second argument `array correctItems` is invalid form: " + typeInfo[1]); + "The second argument `array correctItems` is invalid form: " + typeInfo[1]); } return new Evaluator(); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/evaluation/RecallUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/evaluation/RecallUDAF.java b/core/src/main/java/hivemall/evaluation/RecallUDAF.java index c908a49..c07b858 100644 --- a/core/src/main/java/hivemall/evaluation/RecallUDAF.java +++ b/core/src/main/java/hivemall/evaluation/RecallUDAF.java @@ -65,12 +65,12 @@ public final class RecallUDAF extends AbstractGenericUDAFResolver { ListTypeInfo arg1type = HiveUtils.asListTypeInfo(typeInfo[0]); if (!HiveUtils.isPrimitiveTypeInfo(arg1type.getListElementTypeInfo())) { throw new UDFArgumentTypeException(0, - "The first argument `array rankItems` is invalid form: " + typeInfo[0]); + "The first argument `array rankItems` is invalid form: " + typeInfo[0]); } ListTypeInfo arg2type = HiveUtils.asListTypeInfo(typeInfo[1]); if (!HiveUtils.isPrimitiveTypeInfo(arg2type.getListElementTypeInfo())) { throw new UDFArgumentTypeException(1, - "The second argument `array correctItems` is invalid form: " + typeInfo[1]); + "The second argument `array correctItems` is invalid form: " + typeInfo[1]); } return new Evaluator(); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/ftvec/binning/NumericHistogram.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/ftvec/binning/NumericHistogram.java b/core/src/main/java/hivemall/ftvec/binning/NumericHistogram.java index 7e9a2c1..51b9368 100644 --- a/core/src/main/java/hivemall/ftvec/binning/NumericHistogram.java +++ b/core/src/main/java/hivemall/ftvec/binning/NumericHistogram.java @@ -19,14 +19,15 @@ package hivemall.ftvec.binning; import hivemall.utils.lang.SizeOf; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Random; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; + /** * **THIS CLASS IS IMPORTED FROM HIVE 2.1.0 FOR COMPATIBILITY** * http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDF.java b/core/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDF.java index fe4acf7..8b05a36 100644 --- a/core/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDF.java +++ b/core/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDF.java @@ -27,9 +27,7 @@ import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.io.Text; /** - * @see <a href= - * "http://mathworld.wolfram.com/NormalizedVector.html>http://mathworld.wolfram.com/NormalizedVector.html - * < / a > + * @see http://mathworld.wolfram.com/NormalizedVector.html */ @Description(name = "l2_normalize", value = "_FUNC_(ftvec string) - Returned a L2 normalized value") @UDFType(deterministic = true, stateful = false) http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java b/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java index 9ada4e5..ca622bc 100644 --- a/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java +++ b/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java @@ -61,7 +61,7 @@ public final class ChiSquareUDF extends GenericUDF { private double[] expectedRow = null; // to reuse private double[][] observed = null; // shape = (#features, #classes) private double[][] expected = null; // shape = (#features, #classes) - + private List<DoubleWritable>[] result; @SuppressWarnings("unchecked") @@ -86,13 +86,13 @@ public final class ChiSquareUDF extends GenericUDF { this.observedElOI = HiveUtils.asDoubleCompatibleOI(observedRowOI.getListElementObjectInspector()); this.expectedOI = HiveUtils.asListOI(OIs[0]); this.expectedRowOI = HiveUtils.asListOI(expectedOI.getListElementObjectInspector()); - this.expectedElOI = HiveUtils.asDoubleCompatibleOI(expectedRowOI.getListElementObjectInspector()); + this.expectedElOI = HiveUtils.asDoubleCompatibleOI(expectedRowOI.getListElementObjectInspector()); this.result = new List[2]; - + List<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(); fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); - + return ObjectInspectorFactory.getStandardStructObjectInspector( Arrays.asList("chi2", "pvalue"), fieldOIs); } @@ -139,7 +139,7 @@ public final class ChiSquareUDF extends GenericUDF { } Map.Entry<double[], double[]> chi2 = StatsUtils.chiSquare(observed, expected); - + result[0] = WritableUtils.toWritableList(chi2.getKey(), result[0]); result[1] = WritableUtils.toWritableList(chi2.getValue(), result[1]); return result; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/model/WeightValue.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/model/WeightValue.java b/core/src/main/java/hivemall/model/WeightValue.java index 2fee76a..3d09a56 100644 --- a/core/src/main/java/hivemall/model/WeightValue.java +++ b/core/src/main/java/hivemall/model/WeightValue.java @@ -303,9 +303,9 @@ public class WeightValue implements IWeightValue { @Override public float getFloatParams(@Nonnegative final int i) { - if(i == 1) { + if (i == 1) { return f1; - } else if(i == 2) { + } else if (i == 2) { return f2; } else if (i == 3) { return f3; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/optimizer/EtaEstimator.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/optimizer/EtaEstimator.java b/core/src/main/java/hivemall/optimizer/EtaEstimator.java index 1a4c07d..3b96de0 100644 --- a/core/src/main/java/hivemall/optimizer/EtaEstimator.java +++ b/core/src/main/java/hivemall/optimizer/EtaEstimator.java @@ -97,7 +97,8 @@ public abstract class EtaEstimator { } /** - * bold driver: Gemulla et al., Large-scale matrix factorization with distributed stochastic gradient descent, KDD 2011. + * bold driver: Gemulla et al., Large-scale matrix factorization with distributed stochastic + * gradient descent, KDD 2011. */ public static final class AdjustingEtaEstimator extends EtaEstimator { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/optimizer/LossFunctions.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/optimizer/LossFunctions.java b/core/src/main/java/hivemall/optimizer/LossFunctions.java index b7308e0..54d7fe5 100644 --- a/core/src/main/java/hivemall/optimizer/LossFunctions.java +++ b/core/src/main/java/hivemall/optimizer/LossFunctions.java @@ -188,7 +188,8 @@ public final class LossFunctions { } /** - * Quantile loss is useful to predict rank/order and you do not mind the mean error to increase as long as you get the relative order correct. + * Quantile loss is useful to predict rank/order and you do not mind the mean error to increase + * as long as you get the relative order correct. * * @link http://en.wikipedia.org/wiki/Quantile_regression */ @@ -247,7 +248,8 @@ public final class LossFunctions { } /** - * Epsilon-Insensitive loss used by Support Vector Regression (SVR). <code>loss = max(0, |y - p| - epsilon)</code> + * Epsilon-Insensitive loss used by Support Vector Regression (SVR). + * <code>loss = max(0, |y - p| - epsilon)</code> */ public static final class EpsilonInsensitiveLoss extends RegressionLoss { @@ -416,8 +418,8 @@ public final class LossFunctions { } /** - * @param threshold Margin threshold. When threshold=1.0, one gets the loss used by SVM. When threshold=0.0, one gets the loss used by the - * Perceptron. + * @param threshold Margin threshold. When threshold=1.0, one gets the loss used by SVM. + * When threshold=0.0, one gets the loss used by the Perceptron. */ public HingeLoss(float threshold) { this.threshold = threshold; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/optimizer/Optimizer.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/optimizer/Optimizer.java b/core/src/main/java/hivemall/optimizer/Optimizer.java index 4b11bd1..bbd2320 100644 --- a/core/src/main/java/hivemall/optimizer/Optimizer.java +++ b/core/src/main/java/hivemall/optimizer/Optimizer.java @@ -172,10 +172,11 @@ public interface Optimizer { } /** - * Adam, an algorithm for first-order gradient-based optimization of stochastic objective functions, based on adaptive estimates of lower-order - * moments. + * Adam, an algorithm for first-order gradient-based optimization of stochastic objective + * functions, based on adaptive estimates of lower-order moments. * - * - D. P. Kingma and J. L. Ba: "ADAM: A Method for Stochastic Optimization." arXiv preprint arXiv:1412.6980v8, 2014. + * - D. P. Kingma and J. L. Ba: "ADAM: A Method for Stochastic Optimization." arXiv preprint + * arXiv:1412.6980v8, 2014. */ static abstract class Adam extends OptimizerBase { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/regression/RegressionBaseUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/regression/RegressionBaseUDTF.java b/core/src/main/java/hivemall/regression/RegressionBaseUDTF.java index eca4cf3..f8fae89 100644 --- a/core/src/main/java/hivemall/regression/RegressionBaseUDTF.java +++ b/core/src/main/java/hivemall/regression/RegressionBaseUDTF.java @@ -52,7 +52,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn import org.apache.hadoop.io.FloatWritable; /** - * The base class for regression algorithms. RegressionBaseUDTF provides general implementation for online training and batch training. + * The base class for regression algorithms. RegressionBaseUDTF provides general implementation for + * online training and batch training. */ public abstract class RegressionBaseUDTF extends LearnerBaseUDTF { private static final Log logger = LogFactory.getLog(RegressionBaseUDTF.class); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/smile/classification/DecisionTree.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/smile/classification/DecisionTree.java b/core/src/main/java/hivemall/smile/classification/DecisionTree.java index fa97dba..2b723ed 100644 --- a/core/src/main/java/hivemall/smile/classification/DecisionTree.java +++ b/core/src/main/java/hivemall/smile/classification/DecisionTree.java @@ -70,41 +70,57 @@ import smile.classification.Classifier; import smile.math.Math; /** - * Decision tree for classification. A decision tree can be learned by splitting the training set into subsets based on an attribute value test. This - * process is repeated on each derived subset in a recursive manner called recursive partitioning. The recursion is completed when the subset at a - * node all has the same value of the target variable, or when splitting no longer adds value to the predictions. + * Decision tree for classification. A decision tree can be learned by splitting the training set + * into subsets based on an attribute value test. This process is repeated on each derived subset in + * a recursive manner called recursive partitioning. The recursion is completed when the subset at a + * node all has the same value of the target variable, or when splitting no longer adds value to the + * predictions. * <p> - * The algorithms that are used for constructing decision trees usually work top-down by choosing a variable at each step that is the next best - * variable to use in splitting the set of items. "Best" is defined by how well the variable splits the set into homogeneous subsets that have the - * same value of the target variable. Different algorithms use different formulae for measuring "best". Used by the CART algorithm, Gini impurity is a - * measure of how often a randomly chosen element from the set would be incorrectly labeled if it were randomly labeled according to the distribution - * of labels in the subset. Gini impurity can be computed by summing the probability of each item being chosen times the probability of a mistake in - * categorizing that item. It reaches its minimum (zero) when all cases in the node fall into a single target category. Information gain is another - * popular measure, used by the ID3, C4.5 and C5.0 algorithms. Information gain is based on the concept of entropy used in information theory. For - * categorical variables with different number of levels, however, information gain are biased in favor of those attributes with more levels. Instead, - * one may employ the information gain ratio, which solves the drawback of information gain. + * The algorithms that are used for constructing decision trees usually work top-down by choosing a + * variable at each step that is the next best variable to use in splitting the set of items. "Best" + * is defined by how well the variable splits the set into homogeneous subsets that have the same + * value of the target variable. Different algorithms use different formulae for measuring "best". + * Used by the CART algorithm, Gini impurity is a measure of how often a randomly chosen element + * from the set would be incorrectly labeled if it were randomly labeled according to the + * distribution of labels in the subset. Gini impurity can be computed by summing the probability of + * each item being chosen times the probability of a mistake in categorizing that item. It reaches + * its minimum (zero) when all cases in the node fall into a single target category. Information + * gain is another popular measure, used by the ID3, C4.5 and C5.0 algorithms. Information gain is + * based on the concept of entropy used in information theory. For categorical variables with + * different number of levels, however, information gain are biased in favor of those attributes + * with more levels. Instead, one may employ the information gain ratio, which solves the drawback + * of information gain. * <p> - * Classification and Regression Tree techniques have a number of advantages over many of those alternative techniques. + * Classification and Regression Tree techniques have a number of advantages over many of those + * alternative techniques. * <dl> * <dt>Simple to understand and interpret.</dt> - * <dd>In most cases, the interpretation of results summarized in a tree is very simple. This simplicity is useful not only for purposes of rapid - * classification of new observations, but can also often yield a much simpler "model" for explaining why observations are classified or predicted in - * a particular manner.</dd> + * <dd>In most cases, the interpretation of results summarized in a tree is very simple. This + * simplicity is useful not only for purposes of rapid classification of new observations, but can + * also often yield a much simpler "model" for explaining why observations are classified or + * predicted in a particular manner.</dd> * <dt>Able to handle both numerical and categorical data.</dt> - * <dd>Other techniques are usually specialized in analyzing datasets that have only one type of variable.</dd> + * <dd>Other techniques are usually specialized in analyzing datasets that have only one type of + * variable.</dd> * <dt>Tree methods are nonparametric and nonlinear.</dt> - * <dd>The final results of using tree methods for classification or regression can be summarized in a series of (usually few) logical if-then - * conditions (tree nodes). Therefore, there is no implicit assumption that the underlying relationships between the predictor variables and the - * dependent variable are linear, follow some specific non-linear link function, or that they are even monotonic in nature. Thus, tree methods are - * particularly well suited for data mining tasks, where there is often little a priori knowledge nor any coherent set of theories or predictions - * regarding which variables are related and how. In those types of data analytics, tree methods can often reveal simple relationships between just a - * few variables that could have easily gone unnoticed using other analytic techniques.</dd> + * <dd>The final results of using tree methods for classification or regression can be summarized in + * a series of (usually few) logical if-then conditions (tree nodes). Therefore, there is no + * implicit assumption that the underlying relationships between the predictor variables and the + * dependent variable are linear, follow some specific non-linear link function, or that they are + * even monotonic in nature. Thus, tree methods are particularly well suited for data mining tasks, + * where there is often little a priori knowledge nor any coherent set of theories or predictions + * regarding which variables are related and how. In those types of data analytics, tree methods can + * often reveal simple relationships between just a few variables that could have easily gone + * unnoticed using other analytic techniques.</dd> * </dl> - * One major problem with classification and regression trees is their high variance. Often a small change in the data can result in a very different - * series of splits, making interpretation somewhat precarious. Besides, decision-tree learners can create over-complex trees that cause over-fitting. - * Mechanisms such as pruning are necessary to avoid this problem. Another limitation of trees is the lack of smoothness of the prediction surface. + * One major problem with classification and regression trees is their high variance. Often a small + * change in the data can result in a very different series of splits, making interpretation + * somewhat precarious. Besides, decision-tree learners can create over-complex trees that cause + * over-fitting. Mechanisms such as pruning are necessary to avoid this problem. Another limitation + * of trees is the lack of smoothness of the prediction surface. * <p> - * Some techniques such as bagging, boosting, and random forest use more than one decision tree for their analysis. + * Some techniques such as bagging, boosting, and random forest use more than one decision tree for + * their analysis. */ public final class DecisionTree implements Classifier<Vector> { /** @@ -114,8 +130,9 @@ public final class DecisionTree implements Classifier<Vector> { private final Attribute[] _attributes; private final boolean _hasNumericType; /** - * Variable importance. Every time a split of a node is made on variable the (GINI, information gain, etc.) impurity criterion for the two - * descendant nodes is less than the parent node. Adding up the decreases for each individual variable over the tree gives a simple measure of + * Variable importance. Every time a split of a node is made on variable the (GINI, information + * gain, etc.) impurity criterion for the two descendant nodes is less than the parent node. + * Adding up the decreases for each individual variable over the tree gives a simple measure of * variable importance. */ @Nonnull @@ -151,7 +168,8 @@ public final class DecisionTree implements Classifier<Vector> { */ private final int _minLeafSize; /** - * The index of training values in ascending order. Note that only numeric attributes will be sorted. + * The index of training values in ascending order. Note that only numeric attributes will be + * sorted. */ @Nonnull private final ColumnMajorIntMatrix _order; @@ -164,10 +182,12 @@ public final class DecisionTree implements Classifier<Vector> { */ public static enum SplitRule { /** - * Used by the CART algorithm, Gini impurity is a measure of how often a randomly chosen element from the set would be incorrectly labeled if - * it were randomly labeled according to the distribution of labels in the subset. Gini impurity can be computed by summing the probability of - * each item being chosen times the probability of a mistake in categorizing that item. It reaches its minimum (zero) when all cases in the - * node fall into a single target category. + * Used by the CART algorithm, Gini impurity is a measure of how often a randomly chosen + * element from the set would be incorrectly labeled if it were randomly labeled according + * to the distribution of labels in the subset. Gini impurity can be computed by summing the + * probability of each item being chosen times the probability of a mistake in categorizing + * that item. It reaches its minimum (zero) when all cases in the node fall into a single + * target category. */ GINI, /** @@ -903,12 +923,13 @@ public final class DecisionTree implements Classifier<Vector> { * @param attributes the attribute properties. * @param x the training instances. * @param y the response variable. - * @param numVars the number of input variables to pick to split on at each node. It seems that dim/3 give generally good performance, where dim - * is the number of variables. + * @param numVars the number of input variables to pick to split on at each node. It seems that + * dim/3 give generally good performance, where dim is the number of variables. * @param maxLeafs the maximum number of leaf nodes in the tree. * @param minSplits the number of minimum elements in a node to split * @param minLeafSize the minimum size of leaf nodes. - * @param order the index of training values in ascending order. Note that only numeric attributes need be sorted. + * @param order the index of training values in ascending order. Note that only numeric + * attributes need be sorted. * @param bags the sample set of instances for stochastic learning. * @param rule the splitting rule. * @param seed @@ -1018,9 +1039,10 @@ public final class DecisionTree implements Classifier<Vector> { } /** - * Returns the variable importance. Every time a split of a node is made on variable the (GINI, information gain, etc.) impurity criterion for the - * two descendent nodes is less than the parent node. Adding up the decreases for each individual variable over the tree gives a simple measure of - * variable importance. + * Returns the variable importance. Every time a split of a node is made on variable the (GINI, + * information gain, etc.) impurity criterion for the two descendent nodes is less than the + * parent node. Adding up the decreases for each individual variable over the tree gives a + * simple measure of variable importance. * * @return the variable importance */ @@ -1040,7 +1062,8 @@ public final class DecisionTree implements Classifier<Vector> { } /** - * Predicts the class label of an instance and also calculate a posteriori probabilities. Not supported. + * Predicts the class label of an instance and also calculate a posteriori probabilities. Not + * supported. */ public int predict(Vector x, double[] posteriori) { throw new UnsupportedOperationException("Not supported."); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/smile/classification/RandomForestClassifierUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/smile/classification/RandomForestClassifierUDTF.java b/core/src/main/java/hivemall/smile/classification/RandomForestClassifierUDTF.java index 59f52d3..6e8a650 100644 --- a/core/src/main/java/hivemall/smile/classification/RandomForestClassifierUDTF.java +++ b/core/src/main/java/hivemall/smile/classification/RandomForestClassifierUDTF.java @@ -465,7 +465,8 @@ public final class RandomForestClassifierUDTF extends UDTFWithOptions { @Nonnull private final int[] _y; /** - * The index of training values in ascending order. Note that only numeric attributes will be sorted. + * The index of training values in ascending order. Note that only numeric attributes will + * be sorted. */ @Nonnull private final ColumnMajorIntMatrix _order; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/smile/regression/RandomForestRegressionUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/smile/regression/RandomForestRegressionUDTF.java b/core/src/main/java/hivemall/smile/regression/RandomForestRegressionUDTF.java index 58151e4..bbf0431 100644 --- a/core/src/main/java/hivemall/smile/regression/RandomForestRegressionUDTF.java +++ b/core/src/main/java/hivemall/smile/regression/RandomForestRegressionUDTF.java @@ -407,7 +407,8 @@ public final class RandomForestRegressionUDTF extends UDTFWithOptions { */ private final double[] _y; /** - * The index of training values in ascending order. Note that only numeric attributes will be sorted. + * The index of training values in ascending order. Note that only numeric attributes will + * be sorted. */ private final ColumnMajorIntMatrix _order; /** http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/smile/regression/RegressionTree.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/smile/regression/RegressionTree.java b/core/src/main/java/hivemall/smile/regression/RegressionTree.java index 81b9ba8..5ec27df 100755 --- a/core/src/main/java/hivemall/smile/regression/RegressionTree.java +++ b/core/src/main/java/hivemall/smile/regression/RegressionTree.java @@ -71,30 +71,40 @@ import smile.regression.RandomForest; import smile.regression.Regression; /** - * Decision tree for regression. A decision tree can be learned by splitting the training set into subsets based on an attribute value test. This - * process is repeated on each derived subset in a recursive manner called recursive partitioning. + * Decision tree for regression. A decision tree can be learned by splitting the training set into + * subsets based on an attribute value test. This process is repeated on each derived subset in a + * recursive manner called recursive partitioning. * <p> - * Classification and Regression Tree techniques have a number of advantages over many of those alternative techniques. + * Classification and Regression Tree techniques have a number of advantages over many of those + * alternative techniques. * <dl> * <dt>Simple to understand and interpret.</dt> - * <dd>In most cases, the interpretation of results summarized in a tree is very simple. This simplicity is useful not only for purposes of rapid - * classification of new observations, but can also often yield a much simpler "model" for explaining why observations are classified or predicted in - * a particular manner.</dd> + * <dd>In most cases, the interpretation of results summarized in a tree is very simple. This + * simplicity is useful not only for purposes of rapid classification of new observations, but can + * also often yield a much simpler "model" for explaining why observations are classified or + * predicted in a particular manner.</dd> * <dt>Able to handle both numerical and categorical data.</dt> - * <dd>Other techniques are usually specialized in analyzing datasets that have only one type of variable.</dd> + * <dd>Other techniques are usually specialized in analyzing datasets that have only one type of + * variable.</dd> * <dt>Tree methods are nonparametric and nonlinear.</dt> - * <dd>The final results of using tree methods for classification or regression can be summarized in a series of (usually few) logical if-then - * conditions (tree nodes). Therefore, there is no implicit assumption that the underlying relationships between the predictor variables and the - * dependent variable are linear, follow some specific non-linear link function, or that they are even monotonic in nature. Thus, tree methods are - * particularly well suited for data mining tasks, where there is often little a priori knowledge nor any coherent set of theories or predictions - * regarding which variables are related and how. In those types of data analytics, tree methods can often reveal simple relationships between just a - * few variables that could have easily gone unnoticed using other analytic techniques.</dd> + * <dd>The final results of using tree methods for classification or regression can be summarized in + * a series of (usually few) logical if-then conditions (tree nodes). Therefore, there is no + * implicit assumption that the underlying relationships between the predictor variables and the + * dependent variable are linear, follow some specific non-linear link function, or that they are + * even monotonic in nature. Thus, tree methods are particularly well suited for data mining tasks, + * where there is often little a priori knowledge nor any coherent set of theories or predictions + * regarding which variables are related and how. In those types of data analytics, tree methods can + * often reveal simple relationships between just a few variables that could have easily gone + * unnoticed using other analytic techniques.</dd> * </dl> - * One major problem with classification and regression trees is their high variance. Often a small change in the data can result in a very different - * series of splits, making interpretation somewhat precarious. Besides, decision-tree learners can create over-complex trees that cause over-fitting. - * Mechanisms such as pruning are necessary to avoid this problem. Another limitation of trees is the lack of smoothness of the prediction surface. + * One major problem with classification and regression trees is their high variance. Often a small + * change in the data can result in a very different series of splits, making interpretation + * somewhat precarious. Besides, decision-tree learners can create over-complex trees that cause + * over-fitting. Mechanisms such as pruning are necessary to avoid this problem. Another limitation + * of trees is the lack of smoothness of the prediction surface. * <p> - * Some techniques such as bagging, boosting, and random forest use more than one decision tree for their analysis. + * Some techniques such as bagging, boosting, and random forest use more than one decision tree for + * their analysis. * * @see GradientTreeBoost * @see RandomForest @@ -106,8 +116,9 @@ public final class RegressionTree implements Regression<Vector> { private final Attribute[] _attributes; private final boolean _hasNumericType; /** - * Variable importance. Every time a split of a node is made on variable the impurity criterion for the two descendant nodes is less than the - * parent node. Adding up the decreases for each individual variable over the tree gives a simple measure of variable importance. + * Variable importance. Every time a split of a node is made on variable the impurity criterion + * for the two descendant nodes is less than the parent node. Adding up the decreases for each + * individual variable over the tree gives a simple measure of variable importance. */ private final double[] _importance; /** @@ -119,7 +130,8 @@ public final class RegressionTree implements Regression<Vector> { */ private final int _maxDepth; /** - * The number of instances in a node below which the tree will not split, setting S = 5 generally gives good results. + * The number of instances in a node below which the tree will not split, setting S = 5 + * generally gives good results. */ private final int _minSplit; /** @@ -131,7 +143,8 @@ public final class RegressionTree implements Regression<Vector> { */ private final int _numVars; /** - * The index of training values in ascending order. Note that only numeric attributes will be sorted. + * The index of training values in ascending order. Note that only numeric attributes will be + * sorted. */ private final ColumnMajorIntMatrix _order; @@ -140,8 +153,9 @@ public final class RegressionTree implements Regression<Vector> { private final NodeOutput _nodeOutput; /** - * An interface to calculate node output. Note that samples[i] is the number of sampling of dataset[i]. 0 means that the datum is not included and - * values of greater than 1 are possible because of sampling with replacement. + * An interface to calculate node output. Note that samples[i] is the number of sampling of + * dataset[i]. 0 means that the datum is not included and values of greater than 1 are possible + * because of sampling with replacement. */ public interface NodeOutput { /** @@ -489,7 +503,8 @@ public final class RegressionTree implements Regression<Vector> { } /** - * Finds the best attribute to split on at the current node. Returns true if a split exists to reduce squared error, false otherwise. + * Finds the best attribute to split on at the current node. Returns true if a split exists + * to reduce squared error, false otherwise. */ public boolean findBestSplit() { // avoid split if tree depth is larger than threshold @@ -790,11 +805,13 @@ public final class RegressionTree implements Regression<Vector> { * @param attributes the attribute properties. * @param x the training instances. * @param y the response variable. - * @param numVars the number of input variables to pick to split on at each node. It seems that dim/3 give generally good performance, where dim - * is the number of variables. + * @param numVars the number of input variables to pick to split on at each node. It seems that + * dim/3 give generally good performance, where dim is the number of variables. * @param maxLeafs the maximum number of leaf nodes in the tree. - * @param minSplits number of instances in a node below which the tree will not split, setting S = 5 generally gives good results. - * @param order the index of training values in ascending order. Note that only numeric attributes need be sorted. + * @param minSplits number of instances in a node below which the tree will not split, setting S + * = 5 generally gives good results. + * @param order the index of training values in ascending order. Note that only numeric + * attributes need be sorted. * @param bags the sample set of instances for stochastic learning. * @param output An interface to calculate node output. */ @@ -895,8 +912,10 @@ public final class RegressionTree implements Regression<Vector> { } /** - * Returns the variable importance. Every time a split of a node is made on variable the impurity criterion for the two descendent nodes is less - * than the parent node. Adding up the decreases for each individual variable over the tree gives a simple measure of variable importance. + * Returns the variable importance. Every time a split of a node is made on variable the + * impurity criterion for the two descendent nodes is less than the parent node. Adding up the + * decreases for each individual variable over the tree gives a simple measure of variable + * importance. * * @return the variable importance */ http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/tools/math/L2NormUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/math/L2NormUDAF.java b/core/src/main/java/hivemall/tools/math/L2NormUDAF.java index 921272a..dda47fa 100644 --- a/core/src/main/java/hivemall/tools/math/L2NormUDAF.java +++ b/core/src/main/java/hivemall/tools/math/L2NormUDAF.java @@ -25,7 +25,8 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @SuppressWarnings("deprecation") -@Description(name = "l2_norm", +@Description( + name = "l2_norm", value = "_FUNC_(double xi) - Return L2 norm of a vector which has the given values in each dimension") public final class L2NormUDAF extends UDAF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/topicmodel/OnlineLDAModel.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/topicmodel/OnlineLDAModel.java b/core/src/main/java/hivemall/topicmodel/OnlineLDAModel.java index 6a8d6db..14b6ef4 100644 --- a/core/src/main/java/hivemall/topicmodel/OnlineLDAModel.java +++ b/core/src/main/java/hivemall/topicmodel/OnlineLDAModel.java @@ -348,7 +348,8 @@ public final class OnlineLDAModel extends AbstractProbabilisticTopicModel { } /** - * Estimates the variational bound over all documents using only the documents passed as mini-batch. + * Estimates the variational bound over all documents using only the documents passed as + * mini-batch. */ private double computeApproxBound() { // prepare http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/utils/collections/arrays/DoubleArray.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/arrays/DoubleArray.java b/core/src/main/java/hivemall/utils/collections/arrays/DoubleArray.java index c8f3e17..901b5fd 100644 --- a/core/src/main/java/hivemall/utils/collections/arrays/DoubleArray.java +++ b/core/src/main/java/hivemall/utils/collections/arrays/DoubleArray.java @@ -39,7 +39,7 @@ public interface DoubleArray extends Serializable { @Nonnull public double[] toArray(boolean copy); - + public void clear(); } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/utils/collections/sets/IntSet.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/sets/IntSet.java b/core/src/main/java/hivemall/utils/collections/sets/IntSet.java index 398955c..3d9759a 100644 --- a/core/src/main/java/hivemall/utils/collections/sets/IntSet.java +++ b/core/src/main/java/hivemall/utils/collections/sets/IntSet.java @@ -31,7 +31,7 @@ public interface IntSet { public int size(); public void clear(); - + @Nonnull public int[] toArray(boolean copy); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/utils/lang/ArrayUtils.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/lang/ArrayUtils.java b/core/src/main/java/hivemall/utils/lang/ArrayUtils.java index 540f1c6..eb8ef1d 100644 --- a/core/src/main/java/hivemall/utils/lang/ArrayUtils.java +++ b/core/src/main/java/hivemall/utils/lang/ArrayUtils.java @@ -34,8 +34,9 @@ import org.apache.commons.math3.distribution.GammaDistribution; public final class ArrayUtils { /** - * The index value when an element is not found in a list or array: <code>-1</code>. This value is returned by methods in this class and can also - * be used in comparisons with values returned by various method from {@link java.util.List}. + * The index value when an element is not found in a list or array: <code>-1</code>. This value + * is returned by methods in this class and can also be used in comparisons with values returned + * by various method from {@link java.util.List}. */ public static final int INDEX_NOT_FOUND = -1; @@ -738,8 +739,7 @@ public final class ArrayUtils { } @Nonnull - public static float[] newRandomFloatArray(@Nonnegative final int size, - @Nonnull final PRNG rnd) { + public static float[] newRandomFloatArray(@Nonnegative final int size, @Nonnull final PRNG rnd) { final float[] ret = new float[size]; for (int i = 0; i < size; i++) { ret[i] = (float) rnd.nextDouble(); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/utils/math/MathUtils.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/math/MathUtils.java b/core/src/main/java/hivemall/utils/math/MathUtils.java index 56c4f89..3f41b6f 100644 --- a/core/src/main/java/hivemall/utils/math/MathUtils.java +++ b/core/src/main/java/hivemall/utils/math/MathUtils.java @@ -46,13 +46,13 @@ public final class MathUtils { private MathUtils() {} - /** + /** * @return secant 1 / cos(d) */ public static double sec(final double d) { return 1.d / Math.cos(d); } - + /** * Returns a bit mask for the specified number of bits. */ @@ -100,7 +100,8 @@ public final class MathUtils { } /** - * <a href="https://en.wikipedia.org/wiki/Logit">Logit</a> is the inverse of {@link #sigmoid(double)} function. + * <a href="https://en.wikipedia.org/wiki/Logit">Logit</a> is the inverse of + * {@link #sigmoid(double)} function. */ public static double logit(final double p) { return Math.log(p / (1.d - p)); @@ -111,11 +112,14 @@ public final class MathUtils { } /** - * Returns the inverse erf. This code is based on erfInv() in org.apache.commons.math3.special.Erf. + * Returns the inverse erf. This code is based on erfInv() in + * org.apache.commons.math3.special.Erf. * <p> - * This implementation is described in the paper: <a href="http://people.maths.ox.ac.uk/gilesm/files/gems_erfinv.pdf">Approximating the erfinv - * function</a> by Mike Giles, Oxford-Man Institute of Quantitative Finance, which was published in GPU Computing Gems, volume 2, 2010. The source - * code is available <a href="http://gpucomputing.net/?q=node/1828">here</a>. + * This implementation is described in the paper: <a + * href="http://people.maths.ox.ac.uk/gilesm/files/gems_erfinv.pdf">Approximating the erfinv + * function</a> by Mike Giles, Oxford-Man Institute of Quantitative Finance, which was published + * in GPU Computing Gems, volume 2, 2010. The source code is available <a + * href="http://gpucomputing.net/?q=node/1828">here</a>. * </p> * * @param x the value http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/utils/math/MatrixUtils.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/math/MatrixUtils.java b/core/src/main/java/hivemall/utils/math/MatrixUtils.java index a0e5fc7..2fd8193 100644 --- a/core/src/main/java/hivemall/utils/math/MatrixUtils.java +++ b/core/src/main/java/hivemall/utils/math/MatrixUtils.java @@ -48,7 +48,9 @@ public final class MatrixUtils { * R_j = â_{i=1}^{k} A_i R_{j-i} where j = 1..k, R_{-i} = R'_i * </pre> * - * @see http://www.emptyloop.com/technotes/a%20tutorial%20on%20linear%20prediction%20and%20levinson-durbin.pdf + * @see <a + * href="http://www.emptyloop.com/technotes/a%20tutorial%20on%20linear%20prediction%20and%20levinson-durbin.pdf">Cedrick + * Collomb: A tutorial on linear prediction and Levinson-Durbin</a> * @param R autocovariance where |R| >= order * @param A coefficient to be solved where |A| >= order + 1 * @return E variance of prediction error @@ -510,7 +512,8 @@ public final class MatrixUtils { /** * Find the first singular vector/value of a matrix A based on the Power method. * - * @see http://www.cs.yale.edu/homes/el327/datamining2013aFiles/07_singular_value_decomposition.pdf + * @see http + * ://www.cs.yale.edu/homes/el327/datamining2013aFiles/07_singular_value_decomposition.pdf * @param A target matrix * @param x0 initial vector * @param nIter number of iterations for the Power method @@ -650,7 +653,8 @@ public final class MatrixUtils { * Find eigenvalues and eigenvectors of given tridiagonal matrix T. * * @see http://web.csulb.edu/~tgao/math423/s94.pdf - * @see http://stats.stackexchange.com/questions/20643/finding-matrix-eigenvectors-using-qr-decomposition + * @see http://stats.stackexchange.com/questions/20643/finding-matrix-eigenvectors-using-qr- + * decomposition * @param T target tridiagonal matrix * @param nIter number of iterations for the QR method * @param eigvals eigenvalues are stored here http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/main/java/hivemall/utils/sampling/IntReservoirSampler.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/sampling/IntReservoirSampler.java b/core/src/main/java/hivemall/utils/sampling/IntReservoirSampler.java index f86a788..67b5816 100644 --- a/core/src/main/java/hivemall/utils/sampling/IntReservoirSampler.java +++ b/core/src/main/java/hivemall/utils/sampling/IntReservoirSampler.java @@ -24,7 +24,8 @@ import java.util.Random; import javax.annotation.Nonnull; /** - * Vitter's reservoir sampling implementation that randomly chooses k items from a list containing n items. + * Vitter's reservoir sampling implementation that randomly chooses k items from a list containing n + * items. * * @link http://en.wikipedia.org/wiki/Reservoir_sampling * @link http://portal.acm.org/citation.cfm?id=3165 http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/test/java/hivemall/anomaly/ChangeFinder2DTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/anomaly/ChangeFinder2DTest.java b/core/src/test/java/hivemall/anomaly/ChangeFinder2DTest.java index 43a0921..5c0409b 100644 --- a/core/src/test/java/hivemall/anomaly/ChangeFinder2DTest.java +++ b/core/src/test/java/hivemall/anomaly/ChangeFinder2DTest.java @@ -94,7 +94,7 @@ public class ChangeFinder2DTest { params.r1 = 0.01d; params.k = 6; params.T1 = 10; - params.T2 = 5; + params.T2 = 5; PrimitiveObjectInspector oi = PrimitiveObjectInspectorFactory.javaDoubleObjectInspector; ListObjectInspector listOI = ObjectInspectorFactory.getStandardListObjectInspector(oi); ChangeFinder2D cf = new ChangeFinder2D(params, listOI); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/test/java/hivemall/evaluation/BinaryResponsesMeasuresTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/evaluation/BinaryResponsesMeasuresTest.java b/core/src/test/java/hivemall/evaluation/BinaryResponsesMeasuresTest.java index b78bf0e..9f8a04e 100644 --- a/core/src/test/java/hivemall/evaluation/BinaryResponsesMeasuresTest.java +++ b/core/src/test/java/hivemall/evaluation/BinaryResponsesMeasuresTest.java @@ -56,7 +56,8 @@ public class BinaryResponsesMeasuresTest { List<Integer> rankedList = Arrays.asList(1, 3, 2, 6); List<Integer> groundTruth = Arrays.asList(1, 2, 4); - double actual = BinaryResponsesMeasures.Precision(rankedList, groundTruth, rankedList.size()); + double actual = BinaryResponsesMeasures.Precision(rankedList, groundTruth, + rankedList.size()); Assert.assertEquals(0.5d, actual, 0.0001d); actual = BinaryResponsesMeasures.Precision(rankedList, groundTruth, 2); @@ -105,12 +106,14 @@ public class BinaryResponsesMeasuresTest { // meaningless case I: all TPs List<Integer> groundTruthAllTruePositive = Arrays.asList(1, 3, 2, 6); - actual = BinaryResponsesMeasures.AUC(rankedList, groundTruthAllTruePositive, rankedList.size()); + actual = BinaryResponsesMeasures.AUC(rankedList, groundTruthAllTruePositive, + rankedList.size()); Assert.assertEquals(0.5d, actual, 0.0001d); // meaningless case II: all FPs List<Integer> groundTruthAllFalsePositive = Arrays.asList(7, 8, 9, 10); - actual = BinaryResponsesMeasures.AUC(rankedList, groundTruthAllFalsePositive, rankedList.size()); + actual = BinaryResponsesMeasures.AUC(rankedList, groundTruthAllFalsePositive, + rankedList.size()); Assert.assertEquals(0.5d, actual, 0.0001d); } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/test/java/hivemall/evaluation/GradedResponsesMeasuresTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/evaluation/GradedResponsesMeasuresTest.java b/core/src/test/java/hivemall/evaluation/GradedResponsesMeasuresTest.java index 765fa76..6a7cc9d 100644 --- a/core/src/test/java/hivemall/evaluation/GradedResponsesMeasuresTest.java +++ b/core/src/test/java/hivemall/evaluation/GradedResponsesMeasuresTest.java @@ -31,7 +31,8 @@ public class GradedResponsesMeasuresTest { List<Double> recommendTopRelScoreList = Arrays.asList(5.0, 2.0, 4.0, 1.0, 3.0); List<Double> truthTopRelScoreList = Arrays.asList(5.0, 4.0, 3.0); - double actual = GradedResponsesMeasures.nDCG(recommendTopRelScoreList, truthTopRelScoreList, 3); + double actual = GradedResponsesMeasures.nDCG(recommendTopRelScoreList, + truthTopRelScoreList, 3); Assert.assertEquals(0.918770780535d, actual, 0.0001d); actual = GradedResponsesMeasures.nDCG(recommendTopRelScoreList, truthTopRelScoreList, 2); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/test/java/hivemall/knn/similarity/DIMSUMMapperUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/knn/similarity/DIMSUMMapperUDTFTest.java b/core/src/test/java/hivemall/knn/similarity/DIMSUMMapperUDTFTest.java index 3bf90ec..898b4cb 100644 --- a/core/src/test/java/hivemall/knn/similarity/DIMSUMMapperUDTFTest.java +++ b/core/src/test/java/hivemall/knn/similarity/DIMSUMMapperUDTFTest.java @@ -324,7 +324,8 @@ public class DIMSUMMapperUDTFTest { for (String k : items.keySet()) { final Double sims_jk = sims_j.get(k); if (sims_jk != null) { - float simsExact_jk = CosineSimilarityUDF.cosineSimilarity(item_j, items.get(k)); + float simsExact_jk = CosineSimilarityUDF.cosineSimilarity(item_j, + items.get(k)); Assert.assertEquals(simsExact_jk, sims_jk.floatValue(), 1e-6); } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/core/src/test/java/hivemall/utils/codec/DeflateCodecTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/utils/codec/DeflateCodecTest.java b/core/src/test/java/hivemall/utils/codec/DeflateCodecTest.java index 06d1e2e..091644e 100644 --- a/core/src/test/java/hivemall/utils/codec/DeflateCodecTest.java +++ b/core/src/test/java/hivemall/utils/codec/DeflateCodecTest.java @@ -33,7 +33,8 @@ public class DeflateCodecTest { @Test public void testOpscript() throws IOException { - URL url = new URL("https://raw.githubusercontent.com/apache/incubator-hivemall/master/core/pom.xml"); + URL url = new URL( + "https://raw.githubusercontent.com/apache/incubator-hivemall/master/core/pom.xml"); InputStream is = new BufferedInputStream(url.openStream()); String opScript = IOUtils.toString(is); byte[] original1 = opScript.getBytes(); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/mixserv/src/test/java/hivemall/mix/server/MixServerTest.java ---------------------------------------------------------------------- diff --git a/mixserv/src/test/java/hivemall/mix/server/MixServerTest.java b/mixserv/src/test/java/hivemall/mix/server/MixServerTest.java index 2b475c1..f6db5f8 100644 --- a/mixserv/src/test/java/hivemall/mix/server/MixServerTest.java +++ b/mixserv/src/test/java/hivemall/mix/server/MixServerTest.java @@ -296,8 +296,8 @@ public class MixServerTest extends HivemallTestBase { serverExec.shutdown(); } - private static void invokeClient01(String groupId, int serverPort, boolean denseModel, boolean cancelMix) - throws InterruptedException { + private static void invokeClient01(String groupId, int serverPort, boolean denseModel, + boolean cancelMix) throws InterruptedException { PredictionModel model = denseModel ? new NewDenseModel(100) : new NewSparseModel(100, false); model.configureClock(); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/nlp/src/main/java/hivemall/nlp/tokenizer/SmartcnUDF.java ---------------------------------------------------------------------- diff --git a/nlp/src/main/java/hivemall/nlp/tokenizer/SmartcnUDF.java b/nlp/src/main/java/hivemall/nlp/tokenizer/SmartcnUDF.java index a016c7e..39d4821 100644 --- a/nlp/src/main/java/hivemall/nlp/tokenizer/SmartcnUDF.java +++ b/nlp/src/main/java/hivemall/nlp/tokenizer/SmartcnUDF.java @@ -43,96 +43,92 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArraySet; @Description(name = "tokenize_cn", value = "_FUNC_(String line [, const list<string> stopWords])" - + " - returns tokenized strings in array<string>") + + " - returns tokenized strings in array<string>") @UDFType(deterministic = true, stateful = false) public final class SmartcnUDF extends GenericUDF { - private String[] _stopWordsArray; - - private transient SmartChineseAnalyzer _analyzer; - - @Override - public ObjectInspector initialize(ObjectInspector[] arguments) - throws UDFArgumentException { - final int arglen = arguments.length; - if (arglen < 1 || arglen > 2) { - throw new UDFArgumentException( - "Invalid number of arguments for `tokenize_cn`: " + arglen); - } - - this._stopWordsArray = (arglen >= 2) ? HiveUtils - .getConstStringArray(arguments[1]) : null; - this._analyzer = null; - - return ObjectInspectorFactory - .getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableStringObjectInspector); - } - - @Override - public List<Text> evaluate(DeferredObject[] arguments) throws HiveException { - SmartChineseAnalyzer analyzer = _analyzer; - if (analyzer == null) { - CharArraySet stopwords = stopWords(_stopWordsArray); - analyzer = new SmartChineseAnalyzer(stopwords); - this._analyzer = analyzer; - } - - Object arg0 = arguments[0].get(); - if (arg0 == null) { - return null; - } - String line = arg0.toString(); - - final List<Text> results = new ArrayList<Text>(32); - TokenStream stream = null; - try { - stream = analyzer.tokenStream("", line); - if (stream != null) { - analyzeTokens(stream, results); - } - } catch (IOException e) { - IOUtils.closeQuietly(analyzer); - throw new HiveException(e); - } finally { - IOUtils.closeQuietly(stream); - } - return results; - } - - @Override - public void close() throws IOException { - IOUtils.closeQuietly(_analyzer); - } - - @Nonnull - private static CharArraySet stopWords(@Nonnull final String[] array) - throws UDFArgumentException { - if (array == null) { - return SmartChineseAnalyzer.getDefaultStopSet(); - } - if (array.length == 0) { - return CharArraySet.EMPTY_SET; - } - CharArraySet results = new CharArraySet(Arrays.asList(array), /* ignoreCase */ - true); - return results; - } - - private static void analyzeTokens(@Nonnull TokenStream stream, - @Nonnull List<Text> results) throws IOException { - // instantiate an attribute placeholder once - CharTermAttribute termAttr = stream - .getAttribute(CharTermAttribute.class); - stream.reset(); - - while (stream.incrementToken()) { - String term = termAttr.toString(); - results.add(new Text(term)); - } - } - - @Override - public String getDisplayString(String[] children) { - return "tokenize_cn(" + Arrays.toString(children) + ')'; - } + private String[] _stopWordsArray; + + private transient SmartChineseAnalyzer _analyzer; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + final int arglen = arguments.length; + if (arglen < 1 || arglen > 2) { + throw new UDFArgumentException("Invalid number of arguments for `tokenize_cn`: " + + arglen); + } + + this._stopWordsArray = (arglen >= 2) ? HiveUtils.getConstStringArray(arguments[1]) : null; + this._analyzer = null; + + return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + } + + @Override + public List<Text> evaluate(DeferredObject[] arguments) throws HiveException { + SmartChineseAnalyzer analyzer = _analyzer; + if (analyzer == null) { + CharArraySet stopwords = stopWords(_stopWordsArray); + analyzer = new SmartChineseAnalyzer(stopwords); + this._analyzer = analyzer; + } + + Object arg0 = arguments[0].get(); + if (arg0 == null) { + return null; + } + String line = arg0.toString(); + + final List<Text> results = new ArrayList<Text>(32); + TokenStream stream = null; + try { + stream = analyzer.tokenStream("", line); + if (stream != null) { + analyzeTokens(stream, results); + } + } catch (IOException e) { + IOUtils.closeQuietly(analyzer); + throw new HiveException(e); + } finally { + IOUtils.closeQuietly(stream); + } + return results; + } + + @Override + public void close() throws IOException { + IOUtils.closeQuietly(_analyzer); + } + + @Nonnull + private static CharArraySet stopWords(@Nonnull final String[] array) + throws UDFArgumentException { + if (array == null) { + return SmartChineseAnalyzer.getDefaultStopSet(); + } + if (array.length == 0) { + return CharArraySet.EMPTY_SET; + } + CharArraySet results = new CharArraySet(Arrays.asList(array), /* ignoreCase */ + true); + return results; + } + + private static void analyzeTokens(@Nonnull TokenStream stream, @Nonnull List<Text> results) + throws IOException { + // instantiate an attribute placeholder once + CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class); + stream.reset(); + + while (stream.incrementToken()) { + String term = termAttr.toString(); + results.add(new Text(term)); + } + } + + @Override + public String getDisplayString(String[] children) { + return "tokenize_cn(" + Arrays.toString(children) + ')'; + } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/nlp/src/test/java/hivemall/nlp/tokenizer/SmartcnUDFTest.java ---------------------------------------------------------------------- diff --git a/nlp/src/test/java/hivemall/nlp/tokenizer/SmartcnUDFTest.java b/nlp/src/test/java/hivemall/nlp/tokenizer/SmartcnUDFTest.java index 67c2283..83ccb0c 100644 --- a/nlp/src/test/java/hivemall/nlp/tokenizer/SmartcnUDFTest.java +++ b/nlp/src/test/java/hivemall/nlp/tokenizer/SmartcnUDFTest.java @@ -34,52 +34,48 @@ import org.junit.Test; public class SmartcnUDFTest { - @Test - public void testOneArgument() throws UDFArgumentException, IOException { - GenericUDF udf = new SmartcnUDF(); - ObjectInspector[] argOIs = new ObjectInspector[1]; - // line - argOIs[0] = PrimitiveObjectInspectorFactory.javaStringObjectInspector; - udf.initialize(argOIs); - udf.close(); - } + @Test + public void testOneArgument() throws UDFArgumentException, IOException { + GenericUDF udf = new SmartcnUDF(); + ObjectInspector[] argOIs = new ObjectInspector[1]; + // line + argOIs[0] = PrimitiveObjectInspectorFactory.javaStringObjectInspector; + udf.initialize(argOIs); + udf.close(); + } - @Test - public void testTwoArgument() throws UDFArgumentException, IOException { - GenericUDF udf = new SmartcnUDF(); - ObjectInspector[] argOIs = new ObjectInspector[2]; - // line - argOIs[0] = PrimitiveObjectInspectorFactory.javaStringObjectInspector; - // stopWords - argOIs[1] = ObjectInspectorFactory - .getStandardConstantListObjectInspector( - PrimitiveObjectInspectorFactory.javaStringObjectInspector, - null); - udf.initialize(argOIs); - udf.close(); - } + @Test + public void testTwoArgument() throws UDFArgumentException, IOException { + GenericUDF udf = new SmartcnUDF(); + ObjectInspector[] argOIs = new ObjectInspector[2]; + // line + argOIs[0] = PrimitiveObjectInspectorFactory.javaStringObjectInspector; + // stopWords + argOIs[1] = ObjectInspectorFactory.getStandardConstantListObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, null); + udf.initialize(argOIs); + udf.close(); + } - @Test - public void testEvaluateOneRow() throws IOException, HiveException { - SmartcnUDF udf = new SmartcnUDF(); - ObjectInspector[] argOIs = new ObjectInspector[1]; - // line - argOIs[0] = PrimitiveObjectInspectorFactory.writableStringObjectInspector; - udf.initialize(argOIs); + @Test + public void testEvaluateOneRow() throws IOException, HiveException { + SmartcnUDF udf = new SmartcnUDF(); + ObjectInspector[] argOIs = new ObjectInspector[1]; + // line + argOIs[0] = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + udf.initialize(argOIs); - DeferredObject[] args = new DeferredObject[1]; - args[0] = new DeferredObject() { - public Text get() throws HiveException { - return new Text( - "Smartcn为Apache2.0åè®®ç弿ºä¸æåè¯ç³»ç»ï¼Javaè¯è¨ç¼åï¼ä¿®æ¹çä¸ç§é¢è®¡ç®æICTCLASåè¯ç³»ç»ã"); - } + DeferredObject[] args = new DeferredObject[1]; + args[0] = new DeferredObject() { + public Text get() throws HiveException { + return new Text("Smartcn为Apache2.0åè®®ç弿ºä¸æåè¯ç³»ç»ï¼Javaè¯è¨ç¼åï¼ä¿®æ¹çä¸ç§é¢è®¡ç®æICTCLASåè¯ç³»ç»ã"); + } - @Override - public void prepare(int arg) throws HiveException { - } - }; - List<Text> tokens = udf.evaluate(args); - Assert.assertNotNull(tokens); - udf.close(); - } + @Override + public void prepare(int arg) throws HiveException {} + }; + List<Text> tokens = udf.evaluate(args); + Assert.assertNotNull(tokens); + udf.close(); + } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/resources/eclipse-style.xml ---------------------------------------------------------------------- diff --git a/resources/eclipse-style.xml b/resources/eclipse-style.xml index 8cb5eb5..a26e895 100644 --- a/resources/eclipse-style.xml +++ b/resources/eclipse-style.xml @@ -64,7 +64,7 @@ <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch" value="insert"/> -<setting id="org.eclipse.jdt.core.formatter.comment.line_length" value="150"/> +<setting id="org.eclipse.jdt.core.formatter.comment.line_length" value="100"/> <setting id="org.eclipse.jdt.core.formatter.use_on_off_tags" value="false"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant" value="insert"/> http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9c66f0a/xgboost/src/main/java/hivemall/xgboost/NativeLibLoader.java ---------------------------------------------------------------------- diff --git a/xgboost/src/main/java/hivemall/xgboost/NativeLibLoader.java b/xgboost/src/main/java/hivemall/xgboost/NativeLibLoader.java index 82347bf..63a5217 100644 --- a/xgboost/src/main/java/hivemall/xgboost/NativeLibLoader.java +++ b/xgboost/src/main/java/hivemall/xgboost/NativeLibLoader.java @@ -36,11 +36,11 @@ public final class NativeLibLoader { // Try to load a native library if it exists public static synchronized void initXGBoost() { - if(!initialized) { + if (!initialized) { // Since a user-defined native library has a top priority, // we first check if it is defined or not. final String userDefinedLib = System.getProperty(keyUserDefinedLib); - if(userDefinedLib == null) { + if (userDefinedLib == null) { tryLoadNativeLibFromResource("xgboost4j"); } else { tryLoadNativeLib(userDefinedLib); @@ -61,20 +61,20 @@ public final class NativeLibLoader { // Resolve the library file name with a suffix (e.g., dll, .so, etc.) String resolvedLibName = System.mapLibraryName(libName); - if(!hasResource(libPath + resolvedLibName)) { - if(!getOSName().equals("Mac")) { + if (!hasResource(libPath + resolvedLibName)) { + if (!getOSName().equals("Mac")) { return; } // Fix for openjdk7 for Mac // A detail of this workaround can be found in https://github.com/xerial/snappy-java/issues/6 resolvedLibName = "lib" + libName + ".jnilib"; - if(hasResource(libPath + resolvedLibName)) { + if (hasResource(libPath + resolvedLibName)) { return; } } try { - final File tempFile = createTempFileFromResource( - resolvedLibName, NativeLibLoader.class.getResourceAsStream(libPath + resolvedLibName)); + final File tempFile = createTempFileFromResource(resolvedLibName, + NativeLibLoader.class.getResourceAsStream(libPath + resolvedLibName)); logger.info("Copyed the native library in JAR as " + tempFile.getAbsolutePath()); addLibraryPath(tempFile.getParent()); } catch (Exception e) { @@ -85,15 +85,14 @@ public final class NativeLibLoader { private static void tryLoadNativeLib(final String userDefinedLib) { final File userDefinedLibFile = new File(userDefinedLib); - if(!userDefinedLibFile.exists()) { + if (!userDefinedLibFile.exists()) { logger.warn(userDefinedLib + " not found"); } else { try { - final File tempFile = createTempFileFromResource( - userDefinedLibFile.getName(), - new FileInputStream(userDefinedLibFile.getAbsolutePath()) - ); - logger.info("Copyed the user-defined native library as " + tempFile.getAbsolutePath()); + final File tempFile = createTempFileFromResource(userDefinedLibFile.getName(), + new FileInputStream(userDefinedLibFile.getAbsolutePath())); + logger.info("Copyed the user-defined native library as " + + tempFile.getAbsolutePath()); addLibraryPath(tempFile.getParent()); } catch (Exception e) { // Simply ignore it here @@ -104,7 +103,7 @@ public final class NativeLibLoader { private static String getPreffix(@Nonnull String fileName) { int point = fileName.lastIndexOf("."); - if(point != -1) { + if (point != -1) { return fileName.substring(0, point); } return fileName; @@ -119,17 +118,15 @@ public final class NativeLibLoader { * @throws IOException * @throws IllegalArgumentException */ - static File createTempFileFromResource(String libName, InputStream is) - throws IOException, IllegalArgumentException { + static File createTempFileFromResource(String libName, InputStream is) throws IOException, + IllegalArgumentException { // Create a temporary folder with a random number for the native lib final String uuid = UUID.randomUUID().toString(); - final File tempFolder = new File( - System.getProperty("java.io.tmpdir"), - String.format("%s-%s", getPreffix(libName), uuid) - ); - if(!tempFolder.exists()) { + final File tempFolder = new File(System.getProperty("java.io.tmpdir"), String.format( + "%s-%s", getPreffix(libName), uuid)); + if (!tempFolder.exists()) { boolean created = tempFolder.mkdirs(); - if(!created) { + if (!created) { throw new IOException("Failed to create a temporary folder for the native lib"); } } @@ -142,7 +139,7 @@ public final class NativeLibLoader { File extractedLibFile = new File(tempFolder.getAbsolutePath(), libName); final OutputStream os = new FileOutputStream(extractedLibFile); try { - while((readBytes = is.read(buffer)) != -1) { + while ((readBytes = is.read(buffer)) != -1) { os.write(buffer, 0, readBytes); } } finally { @@ -165,7 +162,7 @@ public final class NativeLibLoader { field.setAccessible(true); final String[] paths = (String[]) field.get(null); for (String path : paths) { - if(libPath.equals(path)) { + if (libPath.equals(path)) { return; } }
