http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/smile/classification/RandomForestClassifierUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/smile/classification/RandomForestClassifierUDTF.java b/core/src/main/java/hivemall/smile/classification/RandomForestClassifierUDTF.java index 71eeca2..b2f5e9e 100644 --- a/core/src/main/java/hivemall/smile/classification/RandomForestClassifierUDTF.java +++ b/core/src/main/java/hivemall/smile/classification/RandomForestClassifierUDTF.java @@ -80,8 +80,7 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.Counters.Counter; import org.apache.hadoop.mapred.Reporter; -@Description( - name = "train_randomforest_classifier", +@Description(name = "train_randomforest_classifier", value = "_FUNC_(array<double|string> features, int label [, const string options, const array<double> classWeights])" + "- Returns a relation consists of " + "<string model_id, double model_weight, string model, array<double> var_importance, int oob_errors, int oob_tests>") @@ -133,10 +132,7 @@ public final class RandomForestClassifierUDTF extends UDTFWithOptions { Options opts = new Options(); opts.addOption("trees", "num_trees", true, "The number of trees for each task [default: 50]"); - opts.addOption( - "vars", - "num_variables", - true, + opts.addOption("vars", "num_variables", true, "The number of random selected features [default: ceil(sqrt(x[0].length))]." + " int(num_variables * x[0].length) is considered if num_variable is (0.0,1.0]"); opts.addOption("depth", "max_depth", true, @@ -183,8 +179,8 @@ public final class RandomForestClassifierUDTF extends UDTFWithOptions { maxDepth = Primitives.parseInt(cl.getOptionValue("max_depth"), maxDepth); numLeafs = Primitives.parseInt(cl.getOptionValue("max_leaf_nodes"), numLeafs); minSplits = Primitives.parseInt(cl.getOptionValue("min_split"), minSplits); - minSamplesLeaf = Primitives.parseInt(cl.getOptionValue("min_samples_leaf"), - minSamplesLeaf); + minSamplesLeaf = + Primitives.parseInt(cl.getOptionValue("min_samples_leaf"), minSamplesLeaf); seed = Primitives.parseLong(cl.getOptionValue("seed"), seed); attrs = SmileExtUtils.resolveAttributes(cl.getOptionValue("attribute_types")); splitRule = SmileExtUtils.resolveSplitRule(cl.getOptionValue("split_rule", "GINI")); @@ -247,7 +243,8 @@ public final class RandomForestClassifierUDTF extends UDTFWithOptions { this.matrixBuilder = new CSRMatrixBuilder(8192); } else { throw new UDFArgumentException( - "_FUNC_ takes double[] or string[] for the first argument: " + listOI.getTypeName()); + "_FUNC_ takes double[] or string[] for the first argument: " + + listOI.getTypeName()); } this.labelOI = HiveUtils.asIntCompatibleOI(argOIs[1]); @@ -266,7 +263,8 @@ public final class RandomForestClassifierUDTF extends UDTFWithOptions { fieldOIs.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); fieldNames.add("var_importance"); if (denseInput) { - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); } else { fieldOIs.add(ObjectInspectorFactory.getStandardMapObjectInspector( PrimitiveObjectInspectorFactory.writableIntObjectInspector, @@ -360,8 +358,8 @@ public final class RandomForestClassifierUDTF extends UDTFWithOptions { private void train(@Nonnull Matrix x, @Nonnull final int[] y) throws HiveException { final int numExamples = x.numRows(); if (numExamples != y.length) { - throw new HiveException(String.format("The sizes of X and Y don't match: %d != %d", - numExamples, y.length)); + throw new HiveException( + String.format("The sizes of X and Y don't match: %d != %d", numExamples, y.length)); } checkOptions(); @@ -430,8 +428,8 @@ public final class RandomForestClassifierUDTF extends UDTFWithOptions { if (denseInput) { forwardObjs[3] = WritableUtils.toWritableList(importance.toArray()); } else { - final Map<IntWritable, DoubleWritable> map = new HashMap<IntWritable, DoubleWritable>( - importance.size()); + final Map<IntWritable, DoubleWritable> map = + new HashMap<IntWritable, DoubleWritable>(importance.size()); importance.each(new VectorProcedure() { public void apply(int i, double value) { map.put(new IntWritable(i), new DoubleWritable(value)); @@ -591,8 +589,8 @@ public final class RandomForestClassifierUDTF extends UDTFWithOptions { if (subsample != 1.0d) { nj = (int) Math.round(nj * subsample); } - final int size = (_udtf._classWeight == null) ? nj : (int) Math.round(nj - * _udtf._classWeight[l]); + final int size = (_udtf._classWeight == null) ? nj + : (int) Math.round(nj * _udtf._classWeight[l]); for (int j = 0; j < size; j++) { int xi = rnd.nextInt(nj); int index = cj.get(xi);
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/smile/regression/RandomForestRegressionUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/smile/regression/RandomForestRegressionUDTF.java b/core/src/main/java/hivemall/smile/regression/RandomForestRegressionUDTF.java index f3f4d4f..df5d55b 100644 --- a/core/src/main/java/hivemall/smile/regression/RandomForestRegressionUDTF.java +++ b/core/src/main/java/hivemall/smile/regression/RandomForestRegressionUDTF.java @@ -71,8 +71,7 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.Counters.Counter; import org.apache.hadoop.mapred.Reporter; -@Description( - name = "train_randomforest_regression", +@Description(name = "train_randomforest_regression", value = "_FUNC_(array<double|string> features, double target [, string options]) - " + "Returns a relation consists of " + "<int model_id, int model_type, string pred_model, array<double> var_importance, int oob_errors, int oob_tests>") @@ -121,10 +120,7 @@ public final class RandomForestRegressionUDTF extends UDTFWithOptions { Options opts = new Options(); opts.addOption("trees", "num_trees", true, "The number of trees for each task [default: 50]"); - opts.addOption( - "vars", - "num_variables", - true, + opts.addOption("vars", "num_variables", true, "The number of random selected features [default: ceil(sqrt(x[0].length))]." + " int(num_variables * x[0].length) is considered if num_variable is (0.0,1.0]"); opts.addOption("depth", "max_depth", true, @@ -162,8 +158,8 @@ public final class RandomForestRegressionUDTF extends UDTFWithOptions { maxDepth = Primitives.parseInt(cl.getOptionValue("max_depth"), maxDepth); maxLeafs = Primitives.parseInt(cl.getOptionValue("max_leaf_nodes"), maxLeafs); minSplit = Primitives.parseInt(cl.getOptionValue("min_split"), minSplit); - minSamplesLeaf = Primitives.parseInt(cl.getOptionValue("min_samples_leaf"), - minSamplesLeaf); + minSamplesLeaf = + Primitives.parseInt(cl.getOptionValue("min_samples_leaf"), minSamplesLeaf); seed = Primitives.parseLong(cl.getOptionValue("seed"), seed); attrs = SmileExtUtils.resolveAttributes(cl.getOptionValue("attribute_types")); } @@ -183,10 +179,9 @@ public final class RandomForestRegressionUDTF extends UDTFWithOptions { @Override public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { if (argOIs.length != 2 && argOIs.length != 3) { - throw new UDFArgumentException( - getClass().getSimpleName() - + " takes 2 or 3 arguments: array<double|string> features, double target [, const string options]: " - + argOIs.length); + throw new UDFArgumentException(getClass().getSimpleName() + + " takes 2 or 3 arguments: array<double|string> features, double target [, const string options]: " + + argOIs.length); } ListObjectInspector listOI = HiveUtils.asListOI(argOIs[0]); @@ -202,7 +197,8 @@ public final class RandomForestRegressionUDTF extends UDTFWithOptions { this.matrixBuilder = new CSRMatrixBuilder(8192); } else { throw new UDFArgumentException( - "_FUNC_ takes double[] or string[] for the first argument: " + listOI.getTypeName()); + "_FUNC_ takes double[] or string[] for the first argument: " + + listOI.getTypeName()); } this.targetOI = HiveUtils.asDoubleCompatibleOI(argOIs[1]); @@ -220,7 +216,8 @@ public final class RandomForestRegressionUDTF extends UDTFWithOptions { fieldNames.add("pred_model"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); fieldNames.add("var_importance"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); fieldNames.add("oob_errors"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); fieldNames.add("oob_tests"); @@ -316,8 +313,8 @@ public final class RandomForestRegressionUDTF extends UDTFWithOptions { private void train(@Nonnull Matrix x, @Nonnull final double[] y) throws HiveException { final int numExamples = x.numRows(); if (numExamples != y.length) { - throw new HiveException(String.format("The sizes of X and Y don't match: %d != %d", - numExamples, y.length)); + throw new HiveException( + String.format("The sizes of X and Y don't match: %d != %d", numExamples, y.length)); } checkOptions(); @@ -433,8 +430,8 @@ public final class RandomForestRegressionUDTF extends UDTFWithOptions { private final AtomicInteger _remainingTasks; TrainingTask(RandomForestRegressionUDTF udtf, int taskId, Attribute[] attributes, Matrix x, - double[] y, int numVars, ColumnMajorIntMatrix order, double[] prediction, - int[] oob, long seed, AtomicInteger remainingTasks) { + double[] y, int numVars, ColumnMajorIntMatrix order, double[] prediction, int[] oob, + long seed, AtomicInteger remainingTasks) { this._udtf = udtf; this._taskId = taskId; this._attributes = attributes; @@ -466,9 +463,9 @@ public final class RandomForestRegressionUDTF extends UDTFWithOptions { } StopWatch stopwatch = new StopWatch(); - RegressionTree tree = new RegressionTree(_attributes, _x, _y, _numVars, - _udtf._maxDepth, _udtf._maxLeafNodes, _udtf._minSamplesSplit, - _udtf._minSamplesLeaf, _order, bags, rnd2); + RegressionTree tree = new RegressionTree(_attributes, _x, _y, _numVars, _udtf._maxDepth, + _udtf._maxLeafNodes, _udtf._minSamplesSplit, _udtf._minSamplesLeaf, _order, bags, + rnd2); incrCounter(_udtf._treeConstructionTimeCounter, stopwatch.elapsed(TimeUnit.SECONDS)); // out-of-bag prediction http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/smile/regression/RegressionTree.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/smile/regression/RegressionTree.java b/core/src/main/java/hivemall/smile/regression/RegressionTree.java index 67535c6..b8a3cc7 100755 --- a/core/src/main/java/hivemall/smile/regression/RegressionTree.java +++ b/core/src/main/java/hivemall/smile/regression/RegressionTree.java @@ -230,8 +230,8 @@ public final class RegressionTree implements Regression<Vector> { return falseChild.predict(x); } } else { - throw new IllegalStateException("Unsupported attribute type: " - + splitFeatureType); + throw new IllegalStateException( + "Unsupported attribute type: " + splitFeatureType); } } } @@ -298,8 +298,8 @@ public final class RegressionTree implements Regression<Vector> { indent(builder, depth); builder.append("}\n"); } else { - throw new IllegalStateException("Unsupported attribute type: " - + splitFeatureType); + throw new IllegalStateException( + "Unsupported attribute type: " + splitFeatureType); } } } @@ -318,34 +318,40 @@ public final class RegressionTree implements Regression<Vector> { builder.append(' ').append(parentNodeId).append(" -> ").append(myNodeId); if (parentNodeId == 0) { if (myNodeId == 1) { - builder.append(" [labeldistance=2.5, labelangle=45, headlabel=\"True\"]"); + builder.append( + " [labeldistance=2.5, labelangle=45, headlabel=\"True\"]"); } else { - builder.append(" [labeldistance=2.5, labelangle=-45, headlabel=\"False\"]"); + builder.append( + " [labeldistance=2.5, labelangle=-45, headlabel=\"False\"]"); } } builder.append(";\n"); } } else { if (splitFeatureType == AttributeType.NOMINAL) { - builder.append(String.format( - " %d [label=<%s = %s>, fillcolor=\"#00000000\"];\n", myNodeId, - resolveFeatureName(splitFeature, featureNames), Double.toString(splitValue))); + builder.append( + String.format(" %d [label=<%s = %s>, fillcolor=\"#00000000\"];\n", myNodeId, + resolveFeatureName(splitFeature, featureNames), + Double.toString(splitValue))); } else if (splitFeatureType == AttributeType.NUMERIC) { - builder.append(String.format( - " %d [label=<%s ≤ %s>, fillcolor=\"#00000000\"];\n", myNodeId, - resolveFeatureName(splitFeature, featureNames), Double.toString(splitValue))); + builder.append( + String.format(" %d [label=<%s ≤ %s>, fillcolor=\"#00000000\"];\n", + myNodeId, resolveFeatureName(splitFeature, featureNames), + Double.toString(splitValue))); } else { - throw new IllegalStateException("Unsupported attribute type: " - + splitFeatureType); + throw new IllegalStateException( + "Unsupported attribute type: " + splitFeatureType); } if (myNodeId != parentNodeId) { builder.append(' ').append(parentNodeId).append(" -> ").append(myNodeId); if (parentNodeId == 0) {//only draw edge label on top if (myNodeId == 1) { - builder.append(" [labeldistance=2.5, labelangle=45, headlabel=\"True\"]"); + builder.append( + " [labeldistance=2.5, labelangle=45, headlabel=\"True\"]"); } else { - builder.append(" [labeldistance=2.5, labelangle=-45, headlabel=\"False\"]"); + builder.append( + " [labeldistance=2.5, labelangle=-45, headlabel=\"False\"]"); } } builder.append(";\n"); @@ -405,8 +411,8 @@ public final class RegressionTree implements Regression<Vector> { int falseDepth = falseChild.opCodegen(scripts, depth + trueDepth); selfDepth += falseDepth; } else { - throw new IllegalStateException("Unsupported attribute type: " - + splitFeatureType); + throw new IllegalStateException( + "Unsupported attribute type: " + splitFeatureType); } } return selfDepth; @@ -559,8 +565,8 @@ public final class RegressionTree implements Regression<Vector> { // Loop through features and compute the reduction of squared error, // which is trueCount * trueMean^2 + falseCount * falseMean^2 - count * parentMean^2 - final int[] samples = _hasNumericType ? SmileExtUtils.bagsToSamples(bags, x.numRows()) - : null; + final int[] samples = + _hasNumericType ? SmileExtUtils.bagsToSamples(bags, x.numRows()) : null; for (int varJ : variableIndex(x, bags)) { final Node split = findBestSplit(numSamples, sum, varJ, samples); if (split.splitScore > node.splitScore) { @@ -644,8 +650,8 @@ public final class RegressionTree implements Regression<Vector> { final double trueMean = trueSum[k] / tc; final double falseMean = (sum - trueSum[k]) / fc; - final double gain = (tc * trueMean * trueMean + fc * falseMean * falseMean) - n - * split.output * split.output; + final double gain = (tc * trueMean * trueMean + fc * falseMean * falseMean) + - n * split.output * split.output; if (gain > split.splitScore) { // new best split split.splitFeature = j; @@ -698,8 +704,8 @@ public final class RegressionTree implements Regression<Vector> { // The gain is actually -(reduction in squared error) for // sorting in priority queue, which treats smaller number with // higher priority. - final double gain = (trueCount * trueMean * trueMean + falseCount - * falseMean * falseMean) + final double gain = (trueCount * trueMean * trueMean + + falseCount * falseMean * falseMean) - n * split.output * split.output; if (gain > split.splitScore) { // new best split @@ -718,8 +724,8 @@ public final class RegressionTree implements Regression<Vector> { }); } else { - throw new IllegalStateException("Unsupported attribute type: " - + _attributes[j].type); + throw new IllegalStateException( + "Unsupported attribute type: " + _attributes[j].type); } return split; @@ -812,8 +818,8 @@ public final class RegressionTree implements Regression<Vector> { } } } else { - throw new IllegalStateException("Unsupported attribute type: " - + node.splitFeatureType); + throw new IllegalStateException( + "Unsupported attribute type: " + node.splitFeatureType); } return tc; } @@ -854,14 +860,14 @@ public final class RegressionTree implements Regression<Vector> { */ public RegressionTree(@Nullable Attribute[] attributes, @Nonnull Matrix x, @Nonnull double[] y, int numVars, int maxDepth, int maxLeafs, int minSplits, int minLeafSize, - @Nullable ColumnMajorIntMatrix order, @Nullable int[] bags, - @Nullable NodeOutput output, @Nullable PRNG rand) { + @Nullable ColumnMajorIntMatrix order, @Nullable int[] bags, @Nullable NodeOutput output, + @Nullable PRNG rand) { checkArgument(x, y, numVars, maxDepth, maxLeafs, minSplits, minLeafSize); this._attributes = SmileExtUtils.attributeTypes(attributes, x); if (_attributes.length != x.numColumns()) { - throw new IllegalArgumentException("-attrs option is invalid: " - + Arrays.toString(attributes)); + throw new IllegalArgumentException( + "-attrs option is invalid: " + Arrays.toString(attributes)); } this._hasNumericType = SmileExtUtils.containsNumericType(_attributes); @@ -925,8 +931,8 @@ public final class RegressionTree implements Regression<Vector> { private static void checkArgument(@Nonnull Matrix x, @Nonnull double[] y, int numVars, int maxDepth, int maxLeafs, int minSplits, int minLeafSize) { if (x.numRows() != y.length) { - throw new IllegalArgumentException(String.format( - "The sizes of X and Y don't match: %d != %d", x.numRows(), y.length)); + throw new IllegalArgumentException( + String.format("The sizes of X and Y don't match: %d != %d", x.numRows(), y.length)); } if (numVars <= 0 || numVars > x.numColumns()) { throw new IllegalArgumentException( @@ -944,7 +950,8 @@ public final class RegressionTree implements Regression<Vector> { + minSplits); } if (minLeafSize < 1) { - throw new IllegalArgumentException("Invalid minimum size of leaf nodes: " + minLeafSize); + throw new IllegalArgumentException( + "Invalid minimum size of leaf nodes: " + minLeafSize); } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/smile/tools/RandomForestEnsembleUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/smile/tools/RandomForestEnsembleUDAF.java b/core/src/main/java/hivemall/smile/tools/RandomForestEnsembleUDAF.java index 2e87c2c..a63f5c1 100644 --- a/core/src/main/java/hivemall/smile/tools/RandomForestEnsembleUDAF.java +++ b/core/src/main/java/hivemall/smile/tools/RandomForestEnsembleUDAF.java @@ -59,8 +59,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.IntWritable; -@Description( - name = "rf_ensemble", +@Description(name = "rf_ensemble", value = "_FUNC_(int yhat [, array<double> proba [, double model_weight=1.0]])" + " - Returns ensembled prediction results in <int label, double probability, array<double> probabilities>") public final class RandomForestEnsembleUDAF extends AbstractGenericUDAFResolver { @@ -96,8 +95,8 @@ public final class RandomForestEnsembleUDAF extends AbstractGenericUDAFResolver return new RfEvaluatorV2(); } default: - throw new UDFArgumentLengthException("Expected 1~3 arguments but got " - + typeInfo.length); + throw new UDFArgumentLengthException( + "Expected 1~3 arguments but got " + typeInfo.length); } } @@ -144,7 +143,8 @@ public final class RandomForestEnsembleUDAF extends AbstractGenericUDAFResolver fieldNames.add("probability"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); fieldNames.add("probabilities"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); outputOI = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); } @@ -313,7 +313,8 @@ public final class RandomForestEnsembleUDAF extends AbstractGenericUDAFResolver if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {// from original data this.yhatOI = HiveUtils.asIntegerOI(parameters[0]); this.posterioriOI = HiveUtils.asListOI(parameters[1]); - this.posterioriElemOI = HiveUtils.asDoubleCompatibleOI(posterioriOI.getListElementObjectInspector()); + this.posterioriElemOI = HiveUtils.asDoubleCompatibleOI( + posterioriOI.getListElementObjectInspector()); if (parameters.length == 3) { this.weightOI = HiveUtils.asDoubleCompatibleOI(parameters[2]); } @@ -323,7 +324,8 @@ public final class RandomForestEnsembleUDAF extends AbstractGenericUDAFResolver this.sizeField = soi.getStructFieldRef("size"); this.posterioriField = soi.getStructFieldRef("posteriori"); this.sizeFieldOI = PrimitiveObjectInspectorFactory.writableIntObjectInspector; - this.posterioriFieldOI = ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + this.posterioriFieldOI = ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); } // initialize output @@ -334,7 +336,8 @@ public final class RandomForestEnsembleUDAF extends AbstractGenericUDAFResolver fieldNames.add("size"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector); fieldNames.add("posteriori"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); outputOI = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); } else {// terminate @@ -345,7 +348,8 @@ public final class RandomForestEnsembleUDAF extends AbstractGenericUDAFResolver fieldNames.add("probability"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); fieldNames.add("probabilities"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); outputOI = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); } @@ -372,8 +376,8 @@ public final class RandomForestEnsembleUDAF extends AbstractGenericUDAFResolver Preconditions.checkNotNull(parameters[0]); int yhat = PrimitiveObjectInspectorUtils.getInt(parameters[0], yhatOI); Preconditions.checkNotNull(parameters[1]); - double[] posteriori = HiveUtils.asDoubleArray(parameters[1], posterioriOI, - posterioriElemOI); + double[] posteriori = + HiveUtils.asDoubleArray(parameters[1], posterioriOI, posterioriElemOI); double weight = 1.0d; if (parameters.length == 3) { @@ -481,13 +485,14 @@ public final class RandomForestEnsembleUDAF extends AbstractGenericUDAFResolver this._k = size; this._posteriori = new double[size]; } else { - throw new HiveException("Mismatch in the number of elements: _k=" + _k - + ", size=" + size); + throw new HiveException( + "Mismatch in the number of elements: _k=" + _k + ", size=" + size); } } final double[] posteriori = _posteriori; - final DoubleObjectInspector doubleOI = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + final DoubleObjectInspector doubleOI = + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; for (int i = 0, len = _k; i < len; i++) { Object o2 = posterioriOI.getListElement(posterioriObj, i); posteriori[i] += doubleOI.get(o2); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/smile/tools/TreeExportUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/smile/tools/TreeExportUDF.java b/core/src/main/java/hivemall/smile/tools/TreeExportUDF.java index 1d085e3..86389d1 100644 --- a/core/src/main/java/hivemall/smile/tools/TreeExportUDF.java +++ b/core/src/main/java/hivemall/smile/tools/TreeExportUDF.java @@ -43,8 +43,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.io.Text; -@Description( - name = "tree_export", +@Description(name = "tree_export", value = "_FUNC_(string model, const string options, optional array<string> featureNames=null, optional array<string> classNames=null)" + " - exports a Decision Tree model as javascript/dot]") @UDFType(deterministic = true, stateful = false) @@ -200,7 +199,8 @@ public final class TreeExportUDF extends UDFWithOptions { break; } case graphviz: { - buf.append("digraph Tree {\n node [shape=box, style=\"filled, rounded\", color=\"black\", fontname=helvetica];\n edge [fontname=helvetica];\n"); + buf.append( + "digraph Tree {\n node [shape=box, style=\"filled, rounded\", color=\"black\", fontname=helvetica];\n edge [fontname=helvetica];\n"); double[] colorBrew = (classNames == null) ? null : SmileExtUtils.getColorBrew(classNames.length); node.exportGraphviz(buf, featureNames, classNames, outputName, colorBrew, @@ -226,7 +226,8 @@ public final class TreeExportUDF extends UDFWithOptions { break; } case graphviz: { - buf.append("digraph Tree {\n node [shape=box, style=\"filled, rounded\", color=\"black\", fontname=helvetica];\n edge [fontname=helvetica];\n"); + buf.append( + "digraph Tree {\n node [shape=box, style=\"filled, rounded\", color=\"black\", fontname=helvetica];\n edge [fontname=helvetica];\n"); node.exportGraphviz(buf, featureNames, outputName, new MutableInt(0), 0); buf.append("}"); break; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java b/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java index 6e28935..9b775bf 100644 --- a/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java +++ b/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java @@ -55,8 +55,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspe import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -@Description( - name = "tree_predict", +@Description(name = "tree_predict", value = "_FUNC_(string modelId, string model, array<double|string> features [, const string options | const boolean classification=false])" + " - Returns a prediction result of a random forest" + " in <int value, array<double> a posteriori> for classification and <double> for regression") @@ -134,7 +133,8 @@ public final class TreePredictUDF extends UDFWithOptions { fieldNames.add("value"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector); fieldNames.add("posteriori"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); } else { return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; @@ -163,8 +163,8 @@ public final class TreePredictUDF extends UDFWithOptions { this.featuresProbe = parseFeatures(arg2, featuresProbe); if (evaluator == null) { - this.evaluator = classification ? new ClassificationEvaluator() - : new RegressionEvaluator(); + this.evaluator = + classification ? new ClassificationEvaluator() : new RegressionEvaluator(); } return evaluator.evaluate(modelId, model, featuresProbe); } @@ -221,8 +221,8 @@ public final class TreePredictUDF extends UDFWithOptions { } if (feature.indexOf(':') != -1) { - throw new UDFArgumentException("Invalid feature format `<index>:<value>`: " - + col); + throw new UDFArgumentException( + "Invalid feature format `<index>:<value>`: " + col); } final int colIndex = Integer.parseInt(feature); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/smile/tools/TreePredictUDFv1.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/smile/tools/TreePredictUDFv1.java b/core/src/main/java/hivemall/smile/tools/TreePredictUDFv1.java index 87c022d..549c984 100644 --- a/core/src/main/java/hivemall/smile/tools/TreePredictUDFv1.java +++ b/core/src/main/java/hivemall/smile/tools/TreePredictUDFv1.java @@ -63,8 +63,7 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; -@Description( - name = "tree_predict_v1", +@Description(name = "tree_predict_v1", value = "_FUNC_(string modelId, int modelType, string script, array<double> features [, const boolean classification])" + " - Returns a prediction result of a random forest") @UDFType(deterministic = true, stateful = false) @@ -308,8 +307,8 @@ public final class TreePredictUDFv1 extends GenericUDF { ObjectUtils.readObject(serializedObj, length, root); } } catch (IOException ioe) { - throw new HiveException( - "IOException cause while deserializing DecisionTree object", ioe); + throw new HiveException("IOException cause while deserializing DecisionTree object", + ioe); } catch (Exception e) { throw new HiveException("Exception cause while deserializing DecisionTree object", e); @@ -343,8 +342,8 @@ public final class TreePredictUDFv1 extends GenericUDF { ObjectUtils.readObject(serializedObj, length, root); } } catch (IOException ioe) { - throw new HiveException( - "IOException cause while deserializing DecisionTree object", ioe); + throw new HiveException("IOException cause while deserializing DecisionTree object", + ioe); } catch (Exception e) { throw new HiveException("Exception cause while deserializing DecisionTree object", e); @@ -428,8 +427,8 @@ public final class TreePredictUDFv1 extends GenericUDF { return falseChild.predict(x); } } else { - throw new IllegalStateException("Unsupported attribute type: " - + splitFeatureType); + throw new IllegalStateException( + "Unsupported attribute type: " + splitFeatureType); } } } @@ -531,8 +530,8 @@ public final class TreePredictUDFv1 extends GenericUDF { return falseChild.predict(x); } } else { - throw new IllegalStateException("Unsupported attribute type: " - + splitFeatureType); + throw new IllegalStateException( + "Unsupported attribute type: " + splitFeatureType); } } } @@ -647,9 +646,9 @@ public final class TreePredictUDFv1 extends GenericUDF { ScriptEngineManager manager = new ScriptEngineManager(); ScriptEngine engine = manager.getEngineByExtension("js"); if (!(engine instanceof Compilable)) { - throw new UDFArgumentException("ScriptEngine was not compilable: " - + engine.getFactory().getEngineName() + " version " - + engine.getFactory().getEngineVersion()); + throw new UDFArgumentException( + "ScriptEngine was not compilable: " + engine.getFactory().getEngineName() + + " version " + engine.getFactory().getEngineVersion()); } this.scriptEngine = engine; this.compilableEngine = (Compilable) engine; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/smile/utils/SmileExtUtils.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/smile/utils/SmileExtUtils.java b/core/src/main/java/hivemall/smile/utils/SmileExtUtils.java index 495c3ef..de7f01e 100644 --- a/core/src/main/java/hivemall/smile/utils/SmileExtUtils.java +++ b/core/src/main/java/hivemall/smile/utils/SmileExtUtils.java @@ -156,7 +156,8 @@ public final class SmileExtUtils { } @Nonnull - public static Attribute[] convertAttributeTypes(@Nonnull final smile.data.Attribute[] original) { + public static Attribute[] convertAttributeTypes( + @Nonnull final smile.data.Attribute[] original) { final int size = original.length; final NumericAttribute immutableNumAttr = new NumericAttribute(); final Attribute[] dst = new Attribute[size]; @@ -288,8 +289,8 @@ public final class SmileExtUtils { public static Matrix shuffle(@Nonnull final Matrix x, @Nonnull final int[] y, long seed) { final int numRows = x.numRows(); if (numRows != y.length) { - throw new IllegalArgumentException("x.length (" + numRows + ") != y.length (" - + y.length + ')'); + throw new IllegalArgumentException( + "x.length (" + numRows + ") != y.length (" + y.length + ')'); } if (seed == -1L) { seed = generateSeed(); @@ -321,8 +322,8 @@ public final class SmileExtUtils { @Nonnull long seed) { final int numRows = x.numRows(); if (numRows != y.length) { - throw new IllegalArgumentException("x.length (" + numRows + ") != y.length (" - + y.length + ')'); + throw new IllegalArgumentException( + "x.length (" + numRows + ") != y.length (" + y.length + ')'); } if (seed == -1L) { seed = generateSeed(); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/smile/vm/StackMachine.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/smile/vm/StackMachine.java b/core/src/main/java/hivemall/smile/vm/StackMachine.java index 59a0638..aaf9d86 100644 --- a/core/src/main/java/hivemall/smile/vm/StackMachine.java +++ b/core/src/main/java/hivemall/smile/vm/StackMachine.java @@ -275,8 +275,8 @@ public final class StackMachine { } else { Double v = valuesMap.get(currentOperation.operand); if (v == null) { - throw new VMRuntimeException("value is not bound: " - + currentOperation.operand); + throw new VMRuntimeException( + "value is not bound: " + currentOperation.operand); } push(v); } @@ -284,8 +284,8 @@ public final class StackMachine { break; } default: - throw new VMRuntimeException("Machine code has wrong opcode :" - + currentOperation.op); + throw new VMRuntimeException( + "Machine code has wrong opcode :" + currentOperation.op); } return true; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/statistics/MovingAverageUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/statistics/MovingAverageUDTF.java b/core/src/main/java/hivemall/statistics/MovingAverageUDTF.java index fd24ec0..112c47f 100644 --- a/core/src/main/java/hivemall/statistics/MovingAverageUDTF.java +++ b/core/src/main/java/hivemall/statistics/MovingAverageUDTF.java @@ -62,7 +62,8 @@ public final class MovingAverageUDTF extends GenericUDTF { this.forwardObjs = new Object[] {result}; List<String> fieldNames = Arrays.asList("avg"); - List<ObjectInspector> fieldOIs = Arrays.<ObjectInspector>asList(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + List<ObjectInspector> fieldOIs = Arrays.<ObjectInspector>asList( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/ConvertLabelUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/ConvertLabelUDF.java b/core/src/main/java/hivemall/tools/ConvertLabelUDF.java index 4c8a739..30ab668 100644 --- a/core/src/main/java/hivemall/tools/ConvertLabelUDF.java +++ b/core/src/main/java/hivemall/tools/ConvertLabelUDF.java @@ -24,8 +24,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.io.IntWritable; -@Description( - name = "convert_label", +@Description(name = "convert_label", value = "_FUNC_(const int|const float) - Convert from -1|1 to 0.0f|1.0f, or from 0.0f|1.0f to -1|1") @UDFType(deterministic = true, stateful = false) public final class ConvertLabelUDF extends UDF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/EachTopKUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/EachTopKUDTF.java b/core/src/main/java/hivemall/tools/EachTopKUDTF.java index da33eb6..f01dc56 100644 --- a/core/src/main/java/hivemall/tools/EachTopKUDTF.java +++ b/core/src/main/java/hivemall/tools/EachTopKUDTF.java @@ -42,8 +42,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.io.IntWritable; -@Description( - name = "each_top_k", +@Description(name = "each_top_k", value = "_FUNC_(int K, Object group, double cmpKey, *) - Returns top-K values (or tail-K values when k is less than 0)") public final class EachTopKUDTF extends GenericUDTF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/GenerateSeriesUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/GenerateSeriesUDTF.java b/core/src/main/java/hivemall/tools/GenerateSeriesUDTF.java index 9d9b150..2567ac7 100644 --- a/core/src/main/java/hivemall/tools/GenerateSeriesUDTF.java +++ b/core/src/main/java/hivemall/tools/GenerateSeriesUDTF.java @@ -31,8 +31,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -@Description( - name = "generate_series", +@Description(name = "generate_series", value = "_FUNC_(const int|bigint start, const int|bigint end) - " + "Generate a series of values, from start to end. A similar function to PostgreSQL's `generate_serics`. http://www.postgresql.org/docs/current/static/functions-srf.html", extended = "select generate_series(1,9);\n\n" + "1\n" + "2\n" + "3\n" + "4\n" + "5\n" @@ -62,8 +61,8 @@ public final class GenerateSeriesUDTF extends GenericUDTF { this.start = HiveUtils.getAsConstLong(argOIs[0]); this.end = HiveUtils.getAsConstLong(argOIs[1]); if (start > end) { - throw new UDFArgumentException("start '" + start - + "' must be less than or equals to end '" + end + "'"); + throw new UDFArgumentException( + "start '" + start + "' must be less than or equals to end '" + end + "'"); } return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/TryCastUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/TryCastUDF.java b/core/src/main/java/hivemall/tools/TryCastUDF.java index 69ddc2f..a0f3257 100644 --- a/core/src/main/java/hivemall/tools/TryCastUDF.java +++ b/core/src/main/java/hivemall/tools/TryCastUDF.java @@ -32,8 +32,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -@Description(name = "try_cast", value = "_FUNC_(ANY src, const string typeName)" - + " - Explicitly cast a value as a type. Returns null if cast fails.", +@Description(name = "try_cast", + value = "_FUNC_(ANY src, const string typeName)" + + " - Explicitly cast a value as a type. Returns null if cast fails.", extended = "Usage: select try_cast(array(1.0,2.0,3.0), 'array<string>')\n" + " select try_cast(map('A',10,'B',20,'C',30), 'map<string,double>')") @UDFType(deterministic = true, stateful = false) http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/array/ArrayAppendUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/array/ArrayAppendUDF.java b/core/src/main/java/hivemall/tools/array/ArrayAppendUDF.java index 8c715c4..25d0f4c 100644 --- a/core/src/main/java/hivemall/tools/array/ArrayAppendUDF.java +++ b/core/src/main/java/hivemall/tools/array/ArrayAppendUDF.java @@ -52,14 +52,16 @@ public final class ArrayAppendUDF extends GenericUDF { @Override public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { this.listInspector = HiveUtils.asListOI(argOIs[0]); - this.listElemInspector = HiveUtils.asPrimitiveObjectInspector(listInspector.getListElementObjectInspector()); + this.listElemInspector = + HiveUtils.asPrimitiveObjectInspector(listInspector.getListElementObjectInspector()); this.primInspector = HiveUtils.asPrimitiveObjectInspector(argOIs[1]); if (listElemInspector.getPrimitiveCategory() != primInspector.getPrimitiveCategory()) { throw new UDFArgumentException( "array_append expects the list type to match the type of the value being appended"); } this.returnWritables = listElemInspector.preferWritable(); - return ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorUtils.getStandardObjectInspector(listElemInspector)); + return ObjectInspectorFactory.getStandardListObjectInspector( + ObjectInspectorUtils.getStandardObjectInspector(listElemInspector)); } @Nullable http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/array/ArrayAvgGenericUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/array/ArrayAvgGenericUDAF.java b/core/src/main/java/hivemall/tools/array/ArrayAvgGenericUDAF.java index 090a50c..caaf84e 100644 --- a/core/src/main/java/hivemall/tools/array/ArrayAvgGenericUDAF.java +++ b/core/src/main/java/hivemall/tools/array/ArrayAvgGenericUDAF.java @@ -94,7 +94,8 @@ public final class ArrayAvgGenericUDAF extends AbstractGenericUDAFResolver { // initialize input if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {// from original data this.inputListOI = (ListObjectInspector) parameters[0]; - this.inputListElemOI = HiveUtils.asDoubleCompatibleOI(inputListOI.getListElementObjectInspector()); + this.inputListElemOI = + HiveUtils.asDoubleCompatibleOI(inputListOI.getListElementObjectInspector()); } else {// from partial aggregation StructObjectInspector soi = (StructObjectInspector) parameters[0]; this.internalMergeOI = soi; @@ -102,8 +103,10 @@ public final class ArrayAvgGenericUDAF extends AbstractGenericUDAFResolver { this.sumField = soi.getStructFieldRef("sum"); this.countField = soi.getStructFieldRef("count"); this.sizeOI = PrimitiveObjectInspectorFactory.writableIntObjectInspector; - this.sumOI = ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - this.countOI = ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + this.sumOI = ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + this.countOI = ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableLongObjectInspector); } // initialize output @@ -111,7 +114,8 @@ public final class ArrayAvgGenericUDAF extends AbstractGenericUDAFResolver { if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {// terminatePartial outputOI = internalMergeOI(); } else {// terminate - outputOI = ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableFloatObjectInspector); + outputOI = ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableFloatObjectInspector); } return outputOI; } @@ -123,9 +127,11 @@ public final class ArrayAvgGenericUDAF extends AbstractGenericUDAFResolver { fieldNames.add("size"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector); fieldNames.add("sum"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); fieldNames.add("count"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableLongObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableLongObjectInspector)); return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); } @@ -201,8 +207,8 @@ public final class ArrayAvgGenericUDAF extends AbstractGenericUDAFResolver { } @Override - public List<FloatWritable> terminate(@SuppressWarnings("deprecation") AggregationBuffer aggr) - throws HiveException { + public List<FloatWritable> terminate( + @SuppressWarnings("deprecation") AggregationBuffer aggr) throws HiveException { ArrayAvgAggregationBuffer myAggr = (ArrayAvgAggregationBuffer) aggr; final int size = myAggr._size; @@ -255,8 +261,8 @@ public final class ArrayAvgGenericUDAF extends AbstractGenericUDAFResolver { init(size); } if (size != _size) {// a corner case - throw new HiveException("Mismatch in the number of elements at tuple: " - + tuple.toString()); + throw new HiveException( + "Mismatch in the number of elements at tuple: " + tuple.toString()); } final double[] sum = _sum; final long[] count = _count; @@ -273,8 +279,10 @@ public final class ArrayAvgGenericUDAF extends AbstractGenericUDAFResolver { void merge(final int o_size, @Nonnull final Object o_sum, @Nonnull final Object o_count, @Nonnull final StandardListObjectInspector sumOI, @Nonnull final StandardListObjectInspector countOI) throws HiveException { - final WritableDoubleObjectInspector sumElemOI = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; - final WritableLongObjectInspector countElemOI = PrimitiveObjectInspectorFactory.writableLongObjectInspector; + final WritableDoubleObjectInspector sumElemOI = + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + final WritableLongObjectInspector countElemOI = + PrimitiveObjectInspectorFactory.writableLongObjectInspector; if (o_size != _size) { if (_size == -1) { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/array/ArrayConcatUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/array/ArrayConcatUDF.java b/core/src/main/java/hivemall/tools/array/ArrayConcatUDF.java index 223d69a..62e3e36 100644 --- a/core/src/main/java/hivemall/tools/array/ArrayConcatUDF.java +++ b/core/src/main/java/hivemall/tools/array/ArrayConcatUDF.java @@ -65,10 +65,10 @@ public class ArrayConcatUDF extends GenericUDF { break; } default: - throw new UDFArgumentTypeException(0, "Argument " + i - + " of function CONCAT_ARRAY must be " + LIST_TYPE_NAME + "<" - + Category.PRIMITIVE + ">, but " + arguments[0].getTypeName() - + " was found."); + throw new UDFArgumentTypeException(0, + "Argument " + i + " of function CONCAT_ARRAY must be " + LIST_TYPE_NAME + + "<" + Category.PRIMITIVE + ">, but " + arguments[0].getTypeName() + + " was found."); } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/array/ArrayFlattenUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/array/ArrayFlattenUDF.java b/core/src/main/java/hivemall/tools/array/ArrayFlattenUDF.java index b35ad1e..906d594 100644 --- a/core/src/main/java/hivemall/tools/array/ArrayFlattenUDF.java +++ b/core/src/main/java/hivemall/tools/array/ArrayFlattenUDF.java @@ -48,20 +48,21 @@ public final class ArrayFlattenUDF extends GenericUDF { @Override public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { if (argOIs.length != 1) { - throw new UDFArgumentException("array_flatten expects exactly one argument: " - + argOIs.length); + throw new UDFArgumentException( + "array_flatten expects exactly one argument: " + argOIs.length); } this.listOI = HiveUtils.asListOI(argOIs[0]); ObjectInspector listElemOI = listOI.getListElementObjectInspector(); if (listElemOI.getCategory() != Category.LIST) { - throw new UDFArgumentException("array_flatten takes array of array for the argument: " - + listOI.toString()); + throw new UDFArgumentException( + "array_flatten takes array of array for the argument: " + listOI.toString()); } this.nextedListOI = HiveUtils.asListOI(listElemOI); this.elemOI = nextedListOI.getListElementObjectInspector(); - return ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorUtils.getStandardObjectInspector(elemOI)); + return ObjectInspectorFactory.getStandardListObjectInspector( + ObjectInspectorUtils.getStandardObjectInspector(elemOI)); } @Override http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/array/ArrayIntersectUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/array/ArrayIntersectUDF.java b/core/src/main/java/hivemall/tools/array/ArrayIntersectUDF.java index dab67bf..909176a 100644 --- a/core/src/main/java/hivemall/tools/array/ArrayIntersectUDF.java +++ b/core/src/main/java/hivemall/tools/array/ArrayIntersectUDF.java @@ -55,8 +55,8 @@ public final class ArrayIntersectUDF extends GenericUDF { throws UDFArgumentException { final int argLength = argOIs.length; if (argLength < 2) { - throw new UDFArgumentLengthException("Expecting at least two arrays as arguments: " - + argLength); + throw new UDFArgumentLengthException( + "Expecting at least two arrays as arguments: " + argLength); } ListObjectInspector[] argListOIs = new ListObjectInspector[argLength]; @@ -67,9 +67,9 @@ public final class ArrayIntersectUDF extends GenericUDF { ListObjectInspector listOI = HiveUtils.asListOI(argOIs[i]); if (!ObjectInspectorUtils.compareTypes(listOI.getListElementObjectInspector(), arg0ElemOI)) { - throw new UDFArgumentException("Array types does not match: " - + arg0ElemOI.getTypeName() + " != " - + listOI.getListElementObjectInspector().getTypeName()); + throw new UDFArgumentException( + "Array types does not match: " + arg0ElemOI.getTypeName() + " != " + + listOI.getListElementObjectInspector().getTypeName()); } argListOIs[i] = listOI; } @@ -106,7 +106,8 @@ public final class ArrayIntersectUDF extends GenericUDF { if (argI == null) { continue; } - final Set<InspectableObject> newSet = new HashSet<ArrayIntersectUDF.InspectableObject>(); + final Set<InspectableObject> newSet = + new HashSet<ArrayIntersectUDF.InspectableObject>(); final ListObjectInspector argIListOI = argListOIs[i]; final ObjectInspector argIElemOI = argIListOI.getListElementObjectInspector(); for (int j = 0, j_size = argIListOI.getListLength(argI); j < j_size; j++) { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/array/ArraySliceUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/array/ArraySliceUDF.java b/core/src/main/java/hivemall/tools/array/ArraySliceUDF.java index 5ac12ac..2bc98b9 100644 --- a/core/src/main/java/hivemall/tools/array/ArraySliceUDF.java +++ b/core/src/main/java/hivemall/tools/array/ArraySliceUDF.java @@ -39,8 +39,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; -@Description( - name = "array_slice", +@Description(name = "array_slice", value = "_FUNC_(array<ANY> values, int offset [, int length]) - Slices the given array by the given offset and length parameters.", extended = "select array_slice(array(1,2,3,4,5,6), 2,4);\n" + "> [3,4]") @UDFType(deterministic = true, stateful = false) @@ -56,8 +55,8 @@ public final class ArraySliceUDF extends GenericUDF { @Override public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { if (argOIs.length != 2 && argOIs.length != 3) { - throw new UDFArgumentLengthException("Expected 2 or 3 arguments, but got " - + argOIs.length); + throw new UDFArgumentLengthException( + "Expected 2 or 3 arguments, but got " + argOIs.length); } this.valuesOI = HiveUtils.asListOI(argOIs[0]); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/array/ArraySumUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/array/ArraySumUDAF.java b/core/src/main/java/hivemall/tools/array/ArraySumUDAF.java index 9d4fc93..8a47957 100644 --- a/core/src/main/java/hivemall/tools/array/ArraySumUDAF.java +++ b/core/src/main/java/hivemall/tools/array/ArraySumUDAF.java @@ -112,8 +112,8 @@ public final class ArraySumUDAF extends UDAF { void update(@Nonnull final List<Double> tuple) throws HiveException { if (tuple.size() != _size) {// a corner case - throw new HiveException("Mismatch in the number of elements at tuple: " - + tuple.toString()); + throw new HiveException( + "Mismatch in the number of elements at tuple: " + tuple.toString()); } final List<Double> sum = _sum; for (int i = 0, len = _size; i < len; i++) { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/array/ArrayUnionUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/array/ArrayUnionUDF.java b/core/src/main/java/hivemall/tools/array/ArrayUnionUDF.java index b215351..921bbfa 100644 --- a/core/src/main/java/hivemall/tools/array/ArrayUnionUDF.java +++ b/core/src/main/java/hivemall/tools/array/ArrayUnionUDF.java @@ -67,16 +67,17 @@ public final class ArrayUnionUDF extends GenericUDF { ListObjectInspector checkOI = HiveUtils.asListOI(argOIs[i]); if (!ObjectInspectorUtils.compareTypes(arg0ElemOI, checkOI.getListElementObjectInspector())) { - throw new UDFArgumentException("Array types does not match: " - + arg0OI.getTypeName() + " != " + checkOI.getTypeName()); + throw new UDFArgumentException("Array types does not match: " + arg0OI.getTypeName() + + " != " + checkOI.getTypeName()); } listOIs[i] = checkOI; } this._listOIs = listOIs; - return ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorUtils.getStandardObjectInspector( - arg0ElemOI, ObjectInspectorCopyOption.WRITABLE)); + return ObjectInspectorFactory.getStandardListObjectInspector( + ObjectInspectorUtils.getStandardObjectInspector(arg0ElemOI, + ObjectInspectorCopyOption.WRITABLE)); } @Override http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/array/CollectAllUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/array/CollectAllUDAF.java b/core/src/main/java/hivemall/tools/array/CollectAllUDAF.java index 0ed4020..e8aae7a 100644 --- a/core/src/main/java/hivemall/tools/array/CollectAllUDAF.java +++ b/core/src/main/java/hivemall/tools/array/CollectAllUDAF.java @@ -55,15 +55,18 @@ public class CollectAllUDAF extends AbstractGenericUDAFResolver { super.init(m, parameters); if (m == Mode.PARTIAL1) { inputOI = parameters[0]; - return ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorUtils.getStandardObjectInspector(inputOI)); + return ObjectInspectorFactory.getStandardListObjectInspector( + ObjectInspectorUtils.getStandardObjectInspector(inputOI)); } else { if (!(parameters[0] instanceof StandardListObjectInspector)) { inputOI = ObjectInspectorUtils.getStandardObjectInspector(parameters[0]); - return (StandardListObjectInspector) ObjectInspectorFactory.getStandardListObjectInspector(inputOI); + return (StandardListObjectInspector) ObjectInspectorFactory.getStandardListObjectInspector( + inputOI); } else { internalMergeOI = (StandardListObjectInspector) parameters[0]; inputOI = internalMergeOI.getListElementObjectInspector(); - loi = (StandardListObjectInspector) ObjectInspectorUtils.getStandardObjectInspector(internalMergeOI); + loi = (StandardListObjectInspector) ObjectInspectorUtils.getStandardObjectInspector( + internalMergeOI); return loi; } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/array/ConditionalEmitUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/array/ConditionalEmitUDTF.java b/core/src/main/java/hivemall/tools/array/ConditionalEmitUDTF.java index c7acde6..a73a06f 100644 --- a/core/src/main/java/hivemall/tools/array/ConditionalEmitUDTF.java +++ b/core/src/main/java/hivemall/tools/array/ConditionalEmitUDTF.java @@ -79,7 +79,8 @@ public final class ConditionalEmitUDTF extends GenericUDTF { this.condElemOI = HiveUtils.asBooleanOI(conditionsOI.getListElementObjectInspector()); this.featuresOI = HiveUtils.asListOI(argOIs[1]); - this.featureElemOI = HiveUtils.asPrimitiveObjectInspector(featuresOI.getListElementObjectInspector()); + this.featureElemOI = + HiveUtils.asPrimitiveObjectInspector(featuresOI.getListElementObjectInspector()); List<String> fieldNames = Arrays.asList("feature"); List<ObjectInspector> fieldOIs = Arrays.<ObjectInspector>asList(featureElemOI); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/array/SelectKBestUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/array/SelectKBestUDF.java b/core/src/main/java/hivemall/tools/array/SelectKBestUDF.java index 527060c..ff37217 100644 --- a/core/src/main/java/hivemall/tools/array/SelectKBestUDF.java +++ b/core/src/main/java/hivemall/tools/array/SelectKBestUDF.java @@ -82,7 +82,8 @@ public final class SelectKBestUDF extends GenericUDF { this.featuresOI = HiveUtils.asListOI(OIs[0]); this.featureOI = HiveUtils.asDoubleCompatibleOI(featuresOI.getListElementObjectInspector()); this.importanceListOI = HiveUtils.asListOI(OIs[1]); - this.importanceElemOI = HiveUtils.asDoubleCompatibleOI(importanceListOI.getListElementObjectInspector()); + this.importanceElemOI = + HiveUtils.asDoubleCompatibleOI(importanceListOI.getListElementObjectInspector()); this._k = HiveUtils.getConstInt(OIs[2]); Preconditions.checkArgument(_k >= 1, UDFArgumentException.class); @@ -92,14 +93,15 @@ public final class SelectKBestUDF extends GenericUDF { } this._result = result; - return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + return ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); } @Override public List<DoubleWritable> evaluate(DeferredObject[] dObj) throws HiveException { final double[] features = HiveUtils.asDoubleArray(dObj[0].get(), featuresOI, featureOI); - final double[] importanceList = HiveUtils.asDoubleArray(dObj[1].get(), importanceListOI, - importanceElemOI); + final double[] importanceList = + HiveUtils.asDoubleArray(dObj[1].get(), importanceListOI, importanceElemOI); Preconditions.checkNotNull(features, UDFArgumentException.class); Preconditions.checkNotNull(importanceList, UDFArgumentException.class); @@ -109,7 +111,8 @@ public final class SelectKBestUDF extends GenericUDF { int[] topKIndices = _topKIndices; if (topKIndices == null) { - final List<Map.Entry<Integer, Double>> list = new ArrayList<Map.Entry<Integer, Double>>(); + final List<Map.Entry<Integer, Double>> list = + new ArrayList<Map.Entry<Integer, Double>>(); for (int i = 0; i < importanceList.length; i++) { list.add(new AbstractMap.SimpleEntry<Integer, Double>(i, importanceList[i])); } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/array/SortAndUniqArrayUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/array/SortAndUniqArrayUDF.java b/core/src/main/java/hivemall/tools/array/SortAndUniqArrayUDF.java index 1c6162c..e9c1cc5 100644 --- a/core/src/main/java/hivemall/tools/array/SortAndUniqArrayUDF.java +++ b/core/src/main/java/hivemall/tools/array/SortAndUniqArrayUDF.java @@ -28,8 +28,9 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.io.IntWritable; -@Description(name = "sort_and_uniq_array", value = "_FUNC_(array<int>) - Takes array<int> and " - + "returns a sorted array with duplicate elements eliminated", +@Description(name = "sort_and_uniq_array", + value = "_FUNC_(array<int>) - Takes array<int> and " + + "returns a sorted array with duplicate elements eliminated", extended = "select sort_and_uniq_array(array(3,1,1,-2,10));\n" + "> [-2,1,3,10]") @UDFType(deterministic = true, stateful = false) public class SortAndUniqArrayUDF extends UDF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/array/SubarrayEndWithUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/array/SubarrayEndWithUDF.java b/core/src/main/java/hivemall/tools/array/SubarrayEndWithUDF.java index 0f845ac..9018117 100644 --- a/core/src/main/java/hivemall/tools/array/SubarrayEndWithUDF.java +++ b/core/src/main/java/hivemall/tools/array/SubarrayEndWithUDF.java @@ -26,8 +26,9 @@ import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -@Description(name = "subarray_endwith", value = "_FUNC_(array<int|text> original, int|text key)" - + " - Returns an array that ends with the specified key", +@Description(name = "subarray_endwith", + value = "_FUNC_(array<int|text> original, int|text key)" + + " - Returns an array that ends with the specified key", extended = "select subarray_endwith(array(1,2,3,4), 3);\n" + "> [1,2,3]") @UDFType(deterministic = true, stateful = false) public class SubarrayEndWithUDF extends UDF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/array/SubarrayStartWithUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/array/SubarrayStartWithUDF.java b/core/src/main/java/hivemall/tools/array/SubarrayStartWithUDF.java index 1903de3..ae0d4fb 100644 --- a/core/src/main/java/hivemall/tools/array/SubarrayStartWithUDF.java +++ b/core/src/main/java/hivemall/tools/array/SubarrayStartWithUDF.java @@ -26,8 +26,9 @@ import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -@Description(name = "subarray_startwith", value = "_FUNC_(array<int|text> original, int|text key)" - + " - Returns an array that starts with the specified key", +@Description(name = "subarray_startwith", + value = "_FUNC_(array<int|text> original, int|text key)" + + " - Returns an array that starts with the specified key", extended = "select subarray_startwith(array(1,2,3,4), 2);\n" + "> [2,3,4]") @UDFType(deterministic = true, stateful = false) public class SubarrayStartWithUDF extends UDF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/bits/BitsCollectUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/bits/BitsCollectUDAF.java b/core/src/main/java/hivemall/tools/bits/BitsCollectUDAF.java index cc865a0..126cc37 100644 --- a/core/src/main/java/hivemall/tools/bits/BitsCollectUDAF.java +++ b/core/src/main/java/hivemall/tools/bits/BitsCollectUDAF.java @@ -70,15 +70,18 @@ public final class BitsCollectUDAF extends AbstractGenericUDAFResolver { this.inputOI = HiveUtils.asLongCompatibleOI(argOIs[0]); } else {// from partial aggregation this.mergeOI = HiveUtils.asListOI(argOIs[0]); - this.mergeListElemOI = HiveUtils.asPrimitiveObjectInspector(mergeOI.getListElementObjectInspector()); + this.mergeListElemOI = HiveUtils.asPrimitiveObjectInspector( + mergeOI.getListElementObjectInspector()); } // initialize output final ObjectInspector outputOI; if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {// terminatePartial - outputOI = ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + outputOI = ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableLongObjectInspector); } else {// terminate - outputOI = ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + outputOI = ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableLongObjectInspector); } return outputOI; } @@ -112,8 +115,8 @@ public final class BitsCollectUDAF extends AbstractGenericUDAFResolver { if (arg != null) { int index = PrimitiveObjectInspectorUtils.getInt(arg, inputOI); if (index < 0) { - throw new UDFArgumentException("Specified index SHOULD NOT be negative: " - + index); + throw new UDFArgumentException( + "Specified index SHOULD NOT be negative: " + index); } ArrayAggregationBuffer agg = (ArrayAggregationBuffer) aggr; agg.bitset.set(index); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/bits/BitsORUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/bits/BitsORUDF.java b/core/src/main/java/hivemall/tools/bits/BitsORUDF.java index b76f949..d5497b6 100644 --- a/core/src/main/java/hivemall/tools/bits/BitsORUDF.java +++ b/core/src/main/java/hivemall/tools/bits/BitsORUDF.java @@ -53,13 +53,14 @@ public final class BitsORUDF extends GenericUDF { public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { final int argLength = argOIs.length; if (argLength < 2) { - throw new UDFArgumentLengthException("Expecting at least two arrays as arguments: " - + argLength); + throw new UDFArgumentLengthException( + "Expecting at least two arrays as arguments: " + argLength); } ListObjectInspector[] argListOIs = new ListObjectInspector[argLength]; ListObjectInspector arg0ListOI = HiveUtils.asListOI(argOIs[0]); - PrimitiveObjectInspector arg0ElemOI = HiveUtils.asLongCompatibleOI(arg0ListOI.getListElementObjectInspector()); + PrimitiveObjectInspector arg0ElemOI = + HiveUtils.asLongCompatibleOI(arg0ListOI.getListElementObjectInspector()); argListOIs[0] = arg0ListOI; for (int i = 1; i < argLength; i++) { @@ -75,7 +76,8 @@ public final class BitsORUDF extends GenericUDF { this._listElemOI = arg0ElemOI; this._bitset = new BitSet(); - return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + return ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableLongObjectInspector); } @Override http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/bits/ToBitsUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/bits/ToBitsUDF.java b/core/src/main/java/hivemall/tools/bits/ToBitsUDF.java index f5790d9..599d90a 100644 --- a/core/src/main/java/hivemall/tools/bits/ToBitsUDF.java +++ b/core/src/main/java/hivemall/tools/bits/ToBitsUDF.java @@ -38,8 +38,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.LongWritable; -@Description( - name = "to_bits", +@Description(name = "to_bits", value = "_FUNC_(int[] indexes) - Returns an bitset representation if the given indexes in long[]", extended = "select to_bits(array(1,2,3,128));\n" + "> [14,-9223372036854775808]") @UDFType(deterministic = true, stateful = false) @@ -63,7 +62,8 @@ public final class ToBitsUDF extends GenericUDF { this.listElemOI = HiveUtils.asIntCompatibleOI(listOI.getListElementObjectInspector()); this.bitset = new BitSet(); - return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + return ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableLongObjectInspector); } @Override http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/bits/UnBitsUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/bits/UnBitsUDF.java b/core/src/main/java/hivemall/tools/bits/UnBitsUDF.java index 7651009..e3f6bae 100644 --- a/core/src/main/java/hivemall/tools/bits/UnBitsUDF.java +++ b/core/src/main/java/hivemall/tools/bits/UnBitsUDF.java @@ -59,7 +59,8 @@ public final class UnBitsUDF extends GenericUDF { this.listOI = HiveUtils.asListOI(argOIs[0]); this.listElemLongOI = HiveUtils.asLongOI(listOI.getListElementObjectInspector()); - return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + return ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableLongObjectInspector); } @Override http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/compress/DeflateUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/compress/DeflateUDF.java b/core/src/main/java/hivemall/tools/compress/DeflateUDF.java index 28bc370..4c82387 100644 --- a/core/src/main/java/hivemall/tools/compress/DeflateUDF.java +++ b/core/src/main/java/hivemall/tools/compress/DeflateUDF.java @@ -39,10 +39,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspe import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; -@Description( - name = "deflate", - value = "_FUNC_(TEXT data [, const int compressionLevel]) - " - + "Returns a compressed BINARY object by using Deflater. The compression level must be in range [-1,9]", +@Description(name = "deflate", value = "_FUNC_(TEXT data [, const int compressionLevel]) - " + + "Returns a compressed BINARY object by using Deflater. The compression level must be in range [-1,9]", extended = "select base91(deflate('aaaaaaaaaaaaaaaabbbbccc'));\n" + "> AA+=kaIM|WTt!+wbGAA") @UDFType(deterministic = true, stateful = false) public final class DeflateUDF extends GenericUDF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/compress/InflateUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/compress/InflateUDF.java b/core/src/main/java/hivemall/tools/compress/InflateUDF.java index 20079ae..e2c4cc2 100644 --- a/core/src/main/java/hivemall/tools/compress/InflateUDF.java +++ b/core/src/main/java/hivemall/tools/compress/InflateUDF.java @@ -79,7 +79,8 @@ public final class InflateUDF extends GenericUDF { try { decompressed = codec.decompress(compressed, 0, len); } catch (IOException e) { - throw new HiveException("Failed to decompressed. Compressed data format is illegal.", e); + throw new HiveException("Failed to decompressed. Compressed data format is illegal.", + e); } compressed = null; if (result == null) { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/tools/json/FromJsonUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/json/FromJsonUDF.java b/core/src/main/java/hivemall/tools/json/FromJsonUDF.java index 8ee2a2d..36c29cc 100644 --- a/core/src/main/java/hivemall/tools/json/FromJsonUDF.java +++ b/core/src/main/java/hivemall/tools/json/FromJsonUDF.java @@ -43,8 +43,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.Text; import org.apache.hive.hcatalog.data.HCatRecordObjectInspectorFactory; -@Description( - name = "from_json", +@Description(name = "from_json", value = "_FUNC_(string jsonString, const string returnTypes [, const array<string>|const string columnNames])" + " - Return Hive object.") @UDFType(deterministic = true, stateful = false) @@ -59,8 +58,8 @@ public final class FromJsonUDF extends GenericUDF { @Override public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { if (argOIs.length != 2 && argOIs.length != 3) { - throw new UDFArgumentException("from_json takes two or three arguments: " - + argOIs.length); + throw new UDFArgumentException( + "from_json takes two or three arguments: " + argOIs.length); } this.jsonOI = HiveUtils.asStringOI(argOIs[0]); @@ -95,7 +94,8 @@ public final class FromJsonUDF extends GenericUDF { final int numColumns = columnTypes.size(); if (numColumns == 1) { TypeInfo type = columnTypes.get(0); - returnOI = HCatRecordObjectInspectorFactory.getStandardObjectInspectorFromTypeInfo(type); + returnOI = + HCatRecordObjectInspectorFactory.getStandardObjectInspectorFromTypeInfo(type); } else { if (columnNames == null) { columnNames = new ArrayList<>(numColumns); @@ -111,7 +111,9 @@ public final class FromJsonUDF extends GenericUDF { final ObjectInspector[] fieldOIs = new ObjectInspector[numColumns]; for (int i = 0; i < fieldOIs.length; i++) { TypeInfo type = columnTypes.get(i); - fieldOIs[i] = HCatRecordObjectInspectorFactory.getStandardObjectInspectorFromTypeInfo(type); + fieldOIs[i] = + HCatRecordObjectInspectorFactory.getStandardObjectInspectorFromTypeInfo( + type); } returnOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, Arrays.asList(fieldOIs)); @@ -132,7 +134,8 @@ public final class FromJsonUDF extends GenericUDF { result = JsonSerdeUtils.deserialize(jsonString, columnNames, columnTypes); } catch (Throwable e) { throw new HiveException("Failed to deserialize Json: \n" + jsonString.toString() + '\n' - + ExceptionUtils.prettyPrintStackTrace(e), e); + + ExceptionUtils.prettyPrintStackTrace(e), + e); } return result; }
