http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/evaluation/AUCUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/evaluation/AUCUDAF.java b/core/src/main/java/hivemall/evaluation/AUCUDAF.java index 9cacaa8..8377dd6 100644 --- a/core/src/main/java/hivemall/evaluation/AUCUDAF.java +++ b/core/src/main/java/hivemall/evaluation/AUCUDAF.java @@ -63,7 +63,8 @@ import org.apache.hadoop.io.LongWritable; public final class AUCUDAF extends AbstractGenericUDAFResolver { @Override - public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) + throws SemanticException { if (typeInfo.length != 2 && typeInfo.length != 3) { throw new UDFArgumentTypeException(typeInfo.length - 1, "_FUNC_ takes two or three arguments"); @@ -160,28 +161,33 @@ public final class AUCUDAF extends AbstractGenericUDAFResolver { fieldNames.add("tpPrev"); fieldOIs.add(writableLongObjectInspector); - MapObjectInspector areaPartialMapOI = ObjectInspectorFactory.getStandardMapObjectInspector( - javaDoubleObjectInspector, javaDoubleObjectInspector); + MapObjectInspector areaPartialMapOI = + ObjectInspectorFactory.getStandardMapObjectInspector(javaDoubleObjectInspector, + javaDoubleObjectInspector); fieldNames.add("areaPartialMap"); fieldOIs.add(areaPartialMapOI); - MapObjectInspector fpPartialMapOI = ObjectInspectorFactory.getStandardMapObjectInspector( - javaDoubleObjectInspector, javaLongObjectInspector); + MapObjectInspector fpPartialMapOI = + ObjectInspectorFactory.getStandardMapObjectInspector(javaDoubleObjectInspector, + javaLongObjectInspector); fieldNames.add("fpPartialMap"); fieldOIs.add(fpPartialMapOI); - MapObjectInspector tpPartialMapOI = ObjectInspectorFactory.getStandardMapObjectInspector( - javaDoubleObjectInspector, javaLongObjectInspector); + MapObjectInspector tpPartialMapOI = + ObjectInspectorFactory.getStandardMapObjectInspector(javaDoubleObjectInspector, + javaLongObjectInspector); fieldNames.add("tpPartialMap"); fieldOIs.add(tpPartialMapOI); - MapObjectInspector fpPrevPartialMapOI = ObjectInspectorFactory.getStandardMapObjectInspector( - javaDoubleObjectInspector, javaLongObjectInspector); + MapObjectInspector fpPrevPartialMapOI = + ObjectInspectorFactory.getStandardMapObjectInspector(javaDoubleObjectInspector, + javaLongObjectInspector); fieldNames.add("fpPrevPartialMap"); fieldOIs.add(fpPrevPartialMapOI); - MapObjectInspector tpPrevPartialMapOI = ObjectInspectorFactory.getStandardMapObjectInspector( - javaDoubleObjectInspector, javaLongObjectInspector); + MapObjectInspector tpPrevPartialMapOI = + ObjectInspectorFactory.getStandardMapObjectInspector(javaDoubleObjectInspector, + javaLongObjectInspector); fieldNames.add("tpPrevPartialMap"); fieldOIs.add(tpPrevPartialMapOI); @@ -260,14 +266,14 @@ public final class AUCUDAF extends AbstractGenericUDAFResolver { Object tpObj = internalMergeOI.getStructFieldData(partial, tpField); Object fpPrevObj = internalMergeOI.getStructFieldData(partial, fpPrevField); Object tpPrevObj = internalMergeOI.getStructFieldData(partial, tpPrevField); - Object areaPartialMapObj = internalMergeOI.getStructFieldData(partial, - areaPartialMapField); + Object areaPartialMapObj = + internalMergeOI.getStructFieldData(partial, areaPartialMapField); Object fpPartialMapObj = internalMergeOI.getStructFieldData(partial, fpPartialMapField); Object tpPartialMapObj = internalMergeOI.getStructFieldData(partial, tpPartialMapField); - Object fpPrevPartialMapObj = internalMergeOI.getStructFieldData(partial, - fpPrevPartialMapField); - Object tpPrevPartialMapObj = internalMergeOI.getStructFieldData(partial, - tpPrevPartialMapField); + Object fpPrevPartialMapObj = + internalMergeOI.getStructFieldData(partial, fpPrevPartialMapField); + Object tpPrevPartialMapObj = + internalMergeOI.getStructFieldData(partial, tpPrevPartialMapField); double indexScore = writableDoubleObjectInspector.get(indexScoreObj); double area = writableDoubleObjectInspector.get(areaObj); @@ -276,16 +282,23 @@ public final class AUCUDAF extends AbstractGenericUDAFResolver { long fpPrev = writableLongObjectInspector.get(fpPrevObj); long tpPrev = writableLongObjectInspector.get(tpPrevObj); - StandardMapObjectInspector ddMapOI = ObjectInspectorFactory.getStandardMapObjectInspector( - javaDoubleObjectInspector, javaDoubleObjectInspector); - StandardMapObjectInspector dlMapOI = ObjectInspectorFactory.getStandardMapObjectInspector( - javaDoubleObjectInspector, javaLongObjectInspector); - - Map<Double, Double> areaPartialMap = (Map<Double, Double>) ddMapOI.getMap(HiveUtils.castLazyBinaryObject(areaPartialMapObj)); - Map<Double, Long> fpPartialMap = (Map<Double, Long>) dlMapOI.getMap(HiveUtils.castLazyBinaryObject(fpPartialMapObj)); - Map<Double, Long> tpPartialMap = (Map<Double, Long>) dlMapOI.getMap(HiveUtils.castLazyBinaryObject(tpPartialMapObj)); - Map<Double, Long> fpPrevPartialMap = (Map<Double, Long>) dlMapOI.getMap(HiveUtils.castLazyBinaryObject(fpPrevPartialMapObj)); - Map<Double, Long> tpPrevPartialMap = (Map<Double, Long>) dlMapOI.getMap(HiveUtils.castLazyBinaryObject(tpPrevPartialMapObj)); + StandardMapObjectInspector ddMapOI = + ObjectInspectorFactory.getStandardMapObjectInspector(javaDoubleObjectInspector, + javaDoubleObjectInspector); + StandardMapObjectInspector dlMapOI = + ObjectInspectorFactory.getStandardMapObjectInspector(javaDoubleObjectInspector, + javaLongObjectInspector); + + Map<Double, Double> areaPartialMap = (Map<Double, Double>) ddMapOI.getMap( + HiveUtils.castLazyBinaryObject(areaPartialMapObj)); + Map<Double, Long> fpPartialMap = (Map<Double, Long>) dlMapOI.getMap( + HiveUtils.castLazyBinaryObject(fpPartialMapObj)); + Map<Double, Long> tpPartialMap = (Map<Double, Long>) dlMapOI.getMap( + HiveUtils.castLazyBinaryObject(tpPartialMapObj)); + Map<Double, Long> fpPrevPartialMap = (Map<Double, Long>) dlMapOI.getMap( + HiveUtils.castLazyBinaryObject(fpPrevPartialMapObj)); + Map<Double, Long> tpPrevPartialMap = (Map<Double, Long>) dlMapOI.getMap( + HiveUtils.castLazyBinaryObject(tpPrevPartialMapObj)); ClassificationAUCAggregationBuffer myAggr = (ClassificationAUCAggregationBuffer) agg; myAggr.merge(indexScore, area, fp, tp, fpPrev, tpPrev, areaPartialMap, fpPartialMap, @@ -358,8 +371,8 @@ public final class AUCUDAF extends AbstractGenericUDAFResolver { fpPrevPartialMap.put(indexScore, fpPrev); tpPrevPartialMap.put(indexScore, tpPrev); - SortedMap<Double, Double> areaPartialSortedMap = new TreeMap<Double, Double>( - Collections.reverseOrder()); + SortedMap<Double, Double> areaPartialSortedMap = + new TreeMap<Double, Double>(Collections.reverseOrder()); areaPartialSortedMap.putAll(areaPartialMap); // initialize with leftmost partial result @@ -506,7 +519,8 @@ public final class AUCUDAF extends AbstractGenericUDAFResolver { int recommendSize = recommendList.size(); if (parameters.length == 3) { - recommendSize = PrimitiveObjectInspectorUtils.getInt(parameters[2], recommendSizeOI); + recommendSize = + PrimitiveObjectInspectorUtils.getInt(parameters[2], recommendSizeOI); if (recommendSize < 0) { throw new UDFArgumentException( "The third argument `int recommendSize` must be in greater than or equals to 0: "
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/evaluation/BinaryResponsesMeasures.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/evaluation/BinaryResponsesMeasures.java b/core/src/main/java/hivemall/evaluation/BinaryResponsesMeasures.java index c3b4f6a..7b4dd48 100644 --- a/core/src/main/java/hivemall/evaluation/BinaryResponsesMeasures.java +++ b/core/src/main/java/hivemall/evaluation/BinaryResponsesMeasures.java @@ -43,8 +43,8 @@ public final class BinaryResponsesMeasures { * @param recommendSize top-`recommendSize` items in `rankedList` are recommended * @return nDCG */ - public static double nDCG(@Nonnull final List<?> rankedList, - @Nonnull final List<?> groundTruth, @Nonnegative final int recommendSize) { + public static double nDCG(@Nonnull final List<?> rankedList, @Nonnull final List<?> groundTruth, + @Nonnegative final int recommendSize) { Preconditions.checkArgument(recommendSize >= 0); double dcg = 0.d; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/evaluation/F1ScoreUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/evaluation/F1ScoreUDAF.java b/core/src/main/java/hivemall/evaluation/F1ScoreUDAF.java index ba1c44e..42a5d67 100644 --- a/core/src/main/java/hivemall/evaluation/F1ScoreUDAF.java +++ b/core/src/main/java/hivemall/evaluation/F1ScoreUDAF.java @@ -122,8 +122,8 @@ public final class F1ScoreUDAF extends UDAF { } private static double precision(final PartialResult partial) { - return (partial.totalPredicted == 0L) ? 0d : partial.tp - / (double) partial.totalPredicted; + return (partial.totalPredicted == 0L) ? 0d + : partial.tp / (double) partial.totalPredicted; } private static double recall(final PartialResult partial) { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/evaluation/FMeasureUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/evaluation/FMeasureUDAF.java b/core/src/main/java/hivemall/evaluation/FMeasureUDAF.java index 22c0b7f..d3f39a4 100644 --- a/core/src/main/java/hivemall/evaluation/FMeasureUDAF.java +++ b/core/src/main/java/hivemall/evaluation/FMeasureUDAF.java @@ -53,39 +53,41 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.LongWritable; -@Description( - name = "fmeasure", +@Description(name = "fmeasure", value = "_FUNC_(array|int|boolean actual, array|int| boolean predicted [, const string options])" + " - Return a F-measure (f1score is the special with beta=1.0)") public final class FMeasureUDAF extends AbstractGenericUDAFResolver { @Override - public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) + throws SemanticException { if (typeInfo.length != 2 && typeInfo.length != 3) { throw new UDFArgumentTypeException(typeInfo.length - 1, "_FUNC_ takes two or three arguments"); } - boolean isArg1ListOrIntOrBoolean = HiveUtils.isListTypeInfo(typeInfo[0]) - || HiveUtils.isIntegerTypeInfo(typeInfo[0]) - || HiveUtils.isBooleanTypeInfo(typeInfo[0]); + boolean isArg1ListOrIntOrBoolean = + HiveUtils.isListTypeInfo(typeInfo[0]) || HiveUtils.isIntegerTypeInfo(typeInfo[0]) + || HiveUtils.isBooleanTypeInfo(typeInfo[0]); if (!isArg1ListOrIntOrBoolean) { throw new UDFArgumentTypeException(0, "The first argument `array/int/boolean actual` is invalid form: " + typeInfo[0]); } - boolean isArg2ListOrIntOrBoolean = HiveUtils.isListTypeInfo(typeInfo[1]) - || HiveUtils.isIntegerTypeInfo(typeInfo[1]) - || HiveUtils.isBooleanTypeInfo(typeInfo[1]); + boolean isArg2ListOrIntOrBoolean = + HiveUtils.isListTypeInfo(typeInfo[1]) || HiveUtils.isIntegerTypeInfo(typeInfo[1]) + || HiveUtils.isBooleanTypeInfo(typeInfo[1]); if (!isArg2ListOrIntOrBoolean) { throw new UDFArgumentTypeException(1, - "The second argument `array/int/boolean predicted` is invalid form: " + typeInfo[1]); + "The second argument `array/int/boolean predicted` is invalid form: " + + typeInfo[1]); } if (!typeInfo[0].equals(typeInfo[1])) { - throw new UDFArgumentTypeException(1, "The first argument `actual`'s type is " - + typeInfo[0] + ", but the second argument `predicted`'s type is not match: " - + typeInfo[1]); + throw new UDFArgumentTypeException(1, + "The first argument `actual`'s type is " + typeInfo[0] + + ", but the second argument `predicted`'s type is not match: " + + typeInfo[1]); } return new Evaluator(); @@ -233,20 +235,21 @@ public final class FMeasureUDAF extends AbstractGenericUDAFResolver { predicted = ((ListObjectInspector) predictedOI).getList(parameters[1]); } else {//binary case if (HiveUtils.isBooleanOI(actualOI)) { // boolean case - actual = Arrays.asList(asIntLabel(parameters[0], - (BooleanObjectInspector) actualOI)); - predicted = Arrays.asList(asIntLabel(parameters[1], - (BooleanObjectInspector) predictedOI)); + actual = Arrays.asList( + asIntLabel(parameters[0], (BooleanObjectInspector) actualOI)); + predicted = Arrays.asList( + asIntLabel(parameters[1], (BooleanObjectInspector) predictedOI)); } else { // int case - final int actualLabel = asIntLabel(parameters[0], (IntObjectInspector) actualOI); + final int actualLabel = + asIntLabel(parameters[0], (IntObjectInspector) actualOI); if (actualLabel == 0 && "binary".equals(average)) { actual = Collections.emptyList(); } else { actual = Arrays.asList(actualLabel); } - final int predictedLabel = asIntLabel(parameters[1], - (IntObjectInspector) predictedOI); + final int predictedLabel = + asIntLabel(parameters[1], (IntObjectInspector) predictedOI); if (predictedLabel == 0 && "binary".equals(average)) { predicted = Collections.emptyList(); } else { @@ -303,15 +306,20 @@ public final class FMeasureUDAF extends AbstractGenericUDAFResolver { Object tpObj = internalMergeOI.getStructFieldData(partial, tpField); Object totalActualObj = internalMergeOI.getStructFieldData(partial, totalActualField); - Object totalPredictedObj = internalMergeOI.getStructFieldData(partial, - totalPredictedField); + Object totalPredictedObj = + internalMergeOI.getStructFieldData(partial, totalPredictedField); Object betaObj = internalMergeOI.getStructFieldData(partial, betaOptionField); Object averageObj = internalMergeOI.getStructFieldData(partial, averageOptionFiled); long tp = PrimitiveObjectInspectorFactory.writableLongObjectInspector.get(tpObj); - long totalActual = PrimitiveObjectInspectorFactory.writableLongObjectInspector.get(totalActualObj); - long totalPredicted = PrimitiveObjectInspectorFactory.writableLongObjectInspector.get(totalPredictedObj); - double beta = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector.get(betaObj); - String average = PrimitiveObjectInspectorFactory.writableStringObjectInspector.getPrimitiveJavaObject(averageObj); + long totalActual = + PrimitiveObjectInspectorFactory.writableLongObjectInspector.get(totalActualObj); + long totalPredicted = PrimitiveObjectInspectorFactory.writableLongObjectInspector.get( + totalPredictedObj); + double beta = + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector.get(betaObj); + String average = + PrimitiveObjectInspectorFactory.writableStringObjectInspector.getPrimitiveJavaObject( + averageObj); FMeasureAggregationBuffer myAggr = (FMeasureAggregationBuffer) agg; myAggr.merge(tp, totalActual, totalPredicted, beta, average); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/evaluation/GradedResponsesMeasures.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/evaluation/GradedResponsesMeasures.java b/core/src/main/java/hivemall/evaluation/GradedResponsesMeasures.java index 5bbbb7e..89cd5d9 100644 --- a/core/src/main/java/hivemall/evaluation/GradedResponsesMeasures.java +++ b/core/src/main/java/hivemall/evaluation/GradedResponsesMeasures.java @@ -36,7 +36,8 @@ public final class GradedResponsesMeasures { private GradedResponsesMeasures() {} public static double nDCG(@Nonnull final List<Double> recommendTopRelScoreList, - @Nonnull final List<Double> truthTopRelScoreList, @Nonnegative final int recommendSize) { + @Nonnull final List<Double> truthTopRelScoreList, + @Nonnegative final int recommendSize) { double dcg = DCG(recommendTopRelScoreList, recommendSize); double idcg = DCG(truthTopRelScoreList, recommendSize); return dcg / idcg; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/evaluation/HitRateUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/evaluation/HitRateUDAF.java b/core/src/main/java/hivemall/evaluation/HitRateUDAF.java index b6d74f1..dd3ff4d 100644 --- a/core/src/main/java/hivemall/evaluation/HitRateUDAF.java +++ b/core/src/main/java/hivemall/evaluation/HitRateUDAF.java @@ -65,14 +65,14 @@ import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.LongWritable; -@Description( - name = "hitrate", +@Description(name = "hitrate", value = "_FUNC_(array rankItems, array correctItems [, const int recommendSize = rankItems.size])" + " - Returns HitRate") public final class HitRateUDAF extends AbstractGenericUDAFResolver { @Override - public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) + throws SemanticException { if (typeInfo.length != 2 && typeInfo.length != 3) { throw new UDFArgumentTypeException(typeInfo.length - 1, "_FUNC_ takes two or three arguments"); @@ -175,7 +175,8 @@ public final class HitRateUDAF extends AbstractGenericUDAFResolver { int recommendSize = recommendList.size(); if (parameters.length == 3) { - recommendSize = PrimitiveObjectInspectorUtils.getInt(parameters[2], recommendSizeOI); + recommendSize = + PrimitiveObjectInspectorUtils.getInt(parameters[2], recommendSizeOI); if (recommendSize < 0) { throw new UDFArgumentException( "The third argument `int recommendSize` must be in greater than or equals to 0: " @@ -223,8 +224,8 @@ public final class HitRateUDAF extends AbstractGenericUDAFResolver { } - public static final class HitRateAggregationBuffer extends - GenericUDAFEvaluator.AbstractAggregationBuffer { + public static final class HitRateAggregationBuffer + extends GenericUDAFEvaluator.AbstractAggregationBuffer { private double sum; private long count; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/evaluation/MAPUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/evaluation/MAPUDAF.java b/core/src/main/java/hivemall/evaluation/MAPUDAF.java index 437fab7..222d60e 100644 --- a/core/src/main/java/hivemall/evaluation/MAPUDAF.java +++ b/core/src/main/java/hivemall/evaluation/MAPUDAF.java @@ -47,14 +47,14 @@ import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.LongWritable; -@Description( - name = "average_precision", +@Description(name = "average_precision", value = "_FUNC_(array rankItems, array correctItems [, const int recommendSize = rankItems.size])" + " - Returns MAP") public final class MAPUDAF extends AbstractGenericUDAFResolver { @Override - public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) + throws SemanticException { if (typeInfo.length != 2 && typeInfo.length != 3) { throw new UDFArgumentTypeException(typeInfo.length - 1, "_FUNC_ takes two or three arguments"); @@ -157,7 +157,8 @@ public final class MAPUDAF extends AbstractGenericUDAFResolver { int recommendSize = recommendList.size(); if (parameters.length == 3) { - recommendSize = PrimitiveObjectInspectorUtils.getInt(parameters[2], recommendSizeOI); + recommendSize = + PrimitiveObjectInspectorUtils.getInt(parameters[2], recommendSizeOI); if (recommendSize < 0) { throw new UDFArgumentException( "The third argument `int recommendSize` must be in greater than or equals to 0: " @@ -233,7 +234,8 @@ public final class MAPUDAF extends AbstractGenericUDAFResolver { void iterate(@Nonnull List<?> recommendList, @Nonnull List<?> truthList, @Nonnull int recommendSize) { - sum += BinaryResponsesMeasures.AveragePrecision(recommendList, truthList, recommendSize); + sum += BinaryResponsesMeasures.AveragePrecision(recommendList, truthList, + recommendSize); count++; } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/evaluation/MRRUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/evaluation/MRRUDAF.java b/core/src/main/java/hivemall/evaluation/MRRUDAF.java index 1f5a95c..0ec16c5 100644 --- a/core/src/main/java/hivemall/evaluation/MRRUDAF.java +++ b/core/src/main/java/hivemall/evaluation/MRRUDAF.java @@ -47,14 +47,14 @@ import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.LongWritable; -@Description( - name = "mrr", +@Description(name = "mrr", value = "_FUNC_(array rankItems, array correctItems [, const int recommendSize = rankItems.size])" + " - Returns MRR") public final class MRRUDAF extends AbstractGenericUDAFResolver { @Override - public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) + throws SemanticException { if (typeInfo.length != 2 && typeInfo.length != 3) { throw new UDFArgumentTypeException(typeInfo.length - 1, "_FUNC_ takes two or three arguments"); @@ -157,7 +157,8 @@ public final class MRRUDAF extends AbstractGenericUDAFResolver { int recommendSize = recommendList.size(); if (parameters.length == 3) { - recommendSize = PrimitiveObjectInspectorUtils.getInt(parameters[2], recommendSizeOI); + recommendSize = + PrimitiveObjectInspectorUtils.getInt(parameters[2], recommendSizeOI); if (recommendSize < 0) { throw new UDFArgumentException( "The third argument `int recommendSize` must be in greater than or equals to 0: " http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/evaluation/NDCGUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/evaluation/NDCGUDAF.java b/core/src/main/java/hivemall/evaluation/NDCGUDAF.java index 7510bac..1fe623e 100644 --- a/core/src/main/java/hivemall/evaluation/NDCGUDAF.java +++ b/core/src/main/java/hivemall/evaluation/NDCGUDAF.java @@ -49,14 +49,14 @@ import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.LongWritable; -@Description( - name = "ndcg", +@Description(name = "ndcg", value = "_FUNC_(array rankItems, array correctItems [, const int recommendSize = rankItems.size])" + " - Returns nDCG") public final class NDCGUDAF extends AbstractGenericUDAFResolver { @Override - public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) + throws SemanticException { if (typeInfo.length != 2 && typeInfo.length != 3) { throw new UDFArgumentTypeException(typeInfo.length - 1, "_FUNC_ takes two or three arguments"); @@ -160,7 +160,8 @@ public final class NDCGUDAF extends AbstractGenericUDAFResolver { int recommendSize = recommendList.size(); if (parameters.length == 3) { - recommendSize = PrimitiveObjectInspectorUtils.getInt(parameters[2], recommendSizeOI); + recommendSize = + PrimitiveObjectInspectorUtils.getInt(parameters[2], recommendSizeOI); if (recommendSize < 0) { throw new UDFArgumentException( "The third argument `int recommendSize` must be in greater than or equals to 0: " @@ -168,7 +169,8 @@ public final class NDCGUDAF extends AbstractGenericUDAFResolver { } } - boolean isBinary = !HiveUtils.isStructOI(recommendListOI.getListElementObjectInspector()); + boolean isBinary = + !HiveUtils.isStructOI(recommendListOI.getListElementObjectInspector()); double ndcg = 0.0d; if (isBinary) { @@ -176,34 +178,37 @@ public final class NDCGUDAF extends AbstractGenericUDAFResolver { } else { // Create a ordered list of relevance scores for recommended items List<Double> recommendRelScoreList = new ArrayList<Double>(); - StructObjectInspector sOI = (StructObjectInspector) recommendListOI.getListElementObjectInspector(); + StructObjectInspector sOI = + (StructObjectInspector) recommendListOI.getListElementObjectInspector(); List<?> fieldRefList = sOI.getAllStructFieldRefs(); StructField relScoreField = (StructField) fieldRefList.get(0); - PrimitiveObjectInspector relScoreFieldOI = HiveUtils.asDoubleCompatibleOI(relScoreField.getFieldObjectInspector()); + PrimitiveObjectInspector relScoreFieldOI = + HiveUtils.asDoubleCompatibleOI(relScoreField.getFieldObjectInspector()); for (int i = 0, n = recommendList.size(); i < n; i++) { Object structObj = recommendList.get(i); List<Object> fieldList = sOI.getStructFieldsDataAsList(structObj); Object field0 = fieldList.get(0); if (field0 == null) { - throw new UDFArgumentException("Field 0 of a struct field is null: " - + fieldList); + throw new UDFArgumentException( + "Field 0 of a struct field is null: " + fieldList); } - double relScore = PrimitiveObjectInspectorUtils.getDouble(field0, - relScoreFieldOI); + double relScore = + PrimitiveObjectInspectorUtils.getDouble(field0, relScoreFieldOI); recommendRelScoreList.add(relScore); } // Create a ordered list of relevance scores for truth items List<Double> truthRelScoreList = new ArrayList<Double>(); - PrimitiveObjectInspector truthRelScoreOI = HiveUtils.asDoubleCompatibleOI(truthListOI.getListElementObjectInspector()); + PrimitiveObjectInspector truthRelScoreOI = + HiveUtils.asDoubleCompatibleOI(truthListOI.getListElementObjectInspector()); for (int i = 0, n = truthList.size(); i < n; i++) { Object relScoreObj = truthList.get(i); if (relScoreObj == null) { - throw new UDFArgumentException("Found null in the ground truth: " - + truthList); + throw new UDFArgumentException( + "Found null in the ground truth: " + truthList); } - double relScore = PrimitiveObjectInspectorUtils.getDouble(relScoreObj, - truthRelScoreOI); + double relScore = + PrimitiveObjectInspectorUtils.getDouble(relScoreObj, truthRelScoreOI); truthRelScoreList.add(relScore); } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java b/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java index ef0c81f..4c63c43 100644 --- a/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java +++ b/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java @@ -47,14 +47,14 @@ import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.LongWritable; -@Description( - name = "precision_at", +@Description(name = "precision_at", value = "_FUNC_(array rankItems, array correctItems [, const int recommendSize = rankItems.size])" + " - Returns Precision") public final class PrecisionUDAF extends AbstractGenericUDAFResolver { @Override - public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) + throws SemanticException { if (typeInfo.length != 2 && typeInfo.length != 3) { throw new UDFArgumentTypeException(typeInfo.length - 1, "_FUNC_ takes two or three arguments"); @@ -158,7 +158,8 @@ public final class PrecisionUDAF extends AbstractGenericUDAFResolver { int recommendSize = recommendList.size(); if (parameters.length == 3) { - recommendSize = PrimitiveObjectInspectorUtils.getInt(parameters[2], recommendSizeOI); + recommendSize = + PrimitiveObjectInspectorUtils.getInt(parameters[2], recommendSizeOI); if (recommendSize < 0) { throw new UDFArgumentException( "The third argument `int recommendSize` must be in greater than or equals to 0: " http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/evaluation/R2UDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/evaluation/R2UDAF.java b/core/src/main/java/hivemall/evaluation/R2UDAF.java index 4c8231d..45fc764 100755 --- a/core/src/main/java/hivemall/evaluation/R2UDAF.java +++ b/core/src/main/java/hivemall/evaluation/R2UDAF.java @@ -28,8 +28,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @SuppressWarnings("deprecation") -@Description( - name = "r2", +@Description(name = "r2", value = "_FUNC_(double predicted, double actual) - Return R Squared (coefficient of determination)") public final class R2UDAF extends UDAF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/evaluation/RecallUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/evaluation/RecallUDAF.java b/core/src/main/java/hivemall/evaluation/RecallUDAF.java index cc2e27e..49081b4 100644 --- a/core/src/main/java/hivemall/evaluation/RecallUDAF.java +++ b/core/src/main/java/hivemall/evaluation/RecallUDAF.java @@ -47,14 +47,14 @@ import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.LongWritable; -@Description( - name = "recall_at", +@Description(name = "recall_at", value = "_FUNC_(array rankItems, array correctItems [, const int recommendSize = rankItems.size])" + " - Returns Recall") public final class RecallUDAF extends AbstractGenericUDAFResolver { @Override - public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) + throws SemanticException { if (typeInfo.length != 2 && typeInfo.length != 3) { throw new UDFArgumentTypeException(typeInfo.length - 1, "_FUNC_ takes two or three arguments"); @@ -157,7 +157,8 @@ public final class RecallUDAF extends AbstractGenericUDAFResolver { int recommendSize = recommendList.size(); if (parameters.length == 3) { - recommendSize = PrimitiveObjectInspectorUtils.getInt(parameters[2], recommendSizeOI); + recommendSize = + PrimitiveObjectInspectorUtils.getInt(parameters[2], recommendSizeOI); if (recommendSize < 0) { throw new UDFArgumentException( "The third argument `int recommendSize` must be in greater than or equals to 0: " http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/fm/Entry.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/fm/Entry.java b/core/src/main/java/hivemall/fm/Entry.java index 370e727..06f2758 100644 --- a/core/src/main/java/hivemall/fm/Entry.java +++ b/core/src/main/java/hivemall/fm/Entry.java @@ -57,7 +57,8 @@ class Entry { this(buf, factors, Entry.sizeOf(factors), key, offset); } - private Entry(@Nonnull HeapBuffer buf, int factors, int size, int key, @Nonnegative long offset) { + private Entry(@Nonnull HeapBuffer buf, int factors, int size, int key, + @Nonnegative long offset) { this._buf = buf; this._size = size; this._factors = factors; @@ -259,9 +260,9 @@ class Entry { final float newZ = z + gradW - sigma * W; if (!NumberUtils.isFinite(newZ)) { - throw new IllegalStateException("Got newZ " + newZ + " where z=" + z + ", gradW=" - + gradW + ", sigma=" + sigma + ", W=" + W + ", n=" + n + ", gg=" + gg - + ", alpha=" + alpha); + throw new IllegalStateException( + "Got newZ " + newZ + " where z=" + z + ", gradW=" + gradW + ", sigma=" + sigma + + ", W=" + W + ", n=" + n + ", gg=" + gg + ", alpha=" + alpha); } _buf.putFloat(zOffset, newZ); return newZ; @@ -276,8 +277,8 @@ class Entry { final double n = _buf.getFloat(nOffset); final double newN = n + gradW * gradW; if (!NumberUtils.isFinite(newN)) { - throw new IllegalStateException("Got newN " + newN + " where n=" + n + ", gradW=" - + gradW); + throw new IllegalStateException( + "Got newN " + newN + " where n=" + n + ", gradW=" + gradW); } _buf.putFloat(nOffset, NumberUtils.castToFloat(newN)); // cast may throw ArithmeticException return newN; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/fm/FFMStringFeatureMapModel.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/fm/FFMStringFeatureMapModel.java b/core/src/main/java/hivemall/fm/FFMStringFeatureMapModel.java index 282dc4e..9534367 100644 --- a/core/src/main/java/hivemall/fm/FFMStringFeatureMapModel.java +++ b/core/src/main/java/hivemall/fm/FFMStringFeatureMapModel.java @@ -313,9 +313,8 @@ public final class FFMStringFeatureMapModel extends FieldAwareFactorizationMachi @Nonnull String getStatistics() { final NumberFormat fmt = NumberFormat.getIntegerInstance(Locale.US); - return "FFMStringFeatureMapModel [bytesAllocated=" - + NumberUtils.prettySize(_bytesAllocated) + ", bytesUsed=" - + NumberUtils.prettySize(_bytesUsed) + ", numAllocatedW=" + return "FFMStringFeatureMapModel [bytesAllocated=" + NumberUtils.prettySize(_bytesAllocated) + + ", bytesUsed=" + NumberUtils.prettySize(_bytesUsed) + ", numAllocatedW=" + fmt.format(_numAllocatedW) + ", numReusedW=" + fmt.format(_numReusedW) + ", numRemovedW=" + fmt.format(_numRemovedW) + ", numAllocatedV=" + fmt.format(_numAllocatedV) + ", numReusedV=" + fmt.format(_numReusedV) http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/fm/FMArrayModel.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/fm/FMArrayModel.java b/core/src/main/java/hivemall/fm/FMArrayModel.java index fbae404..97807aa 100644 --- a/core/src/main/java/hivemall/fm/FMArrayModel.java +++ b/core/src/main/java/hivemall/fm/FMArrayModel.java @@ -116,8 +116,8 @@ public final class FMArrayModel extends FactorizationMachineModel { public void check(@Nonnull Feature[] x) throws HiveException { for (Feature e : x) { if (e != null && e.getFeatureIndex() < 1) { - throw new HiveException("Index of x should be greater than or equals to 1: " - + Arrays.toString(x)); + throw new HiveException( + "Index of x should be greater than or equals to 1: " + Arrays.toString(x)); } } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/fm/FMHyperParameters.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/fm/FMHyperParameters.java b/core/src/main/java/hivemall/fm/FMHyperParameters.java index 69c19a8..0992325 100644 --- a/core/src/main/java/hivemall/fm/FMHyperParameters.java +++ b/core/src/main/java/hivemall/fm/FMHyperParameters.java @@ -107,16 +107,17 @@ class FMHyperParameters { this.l2norm = cl.hasOption("enable_norm"); this.iters = Primitives.parseInt(cl.getOptionValue("iterations"), iters); this.conversionCheck = !cl.hasOption("disable_cvtest"); - this.convergenceRate = Primitives.parseDouble(cl.getOptionValue("cv_rate"), convergenceRate); + this.convergenceRate = + Primitives.parseDouble(cl.getOptionValue("cv_rate"), convergenceRate); this.adaptiveRegularization = cl.hasOption("adaptive_regularization"); - this.validationRatio = Primitives.parseFloat(cl.getOptionValue("validation_ratio"), - validationRatio); + this.validationRatio = + Primitives.parseFloat(cl.getOptionValue("validation_ratio"), validationRatio); if (validationRatio < 0.f || validationRatio >= 1.f) { - throw new UDFArgumentException("validation_ratio should be in range [0, 1): " - + validationRatio); + throw new UDFArgumentException( + "validation_ratio should be in range [0, 1): " + validationRatio); } - this.validationThreshold = Primitives.parseInt(cl.getOptionValue("validation_threshold"), - validationThreshold); + this.validationThreshold = + Primitives.parseInt(cl.getOptionValue("validation_threshold"), validationThreshold); this.parseFeatureAsInt = cl.hasOption("int_feature"); } @@ -193,8 +194,8 @@ class FMHyperParameters { case "ftrl": { this.useFTRL = true; this.useAdaGrad = false; - this.alphaFTRL = Primitives.parseFloat(cl.getOptionValue("alphaFTRL"), - alphaFTRL); + this.alphaFTRL = + Primitives.parseFloat(cl.getOptionValue("alphaFTRL"), alphaFTRL); if (alphaFTRL == 0.f) { throw new UDFArgumentException("-alphaFTRL SHOULD NOT be 0"); } @@ -223,9 +224,8 @@ class FMHyperParameters { public String toString() { return "FFMHyperParameters [globalBias=" + globalBias + ", linearCoeff=" + linearCoeff + ", numFields=" + numFields + ", useAdaGrad=" + useAdaGrad + ", eps=" + eps - + ", useFTRL=" + useFTRL + ", alphaFTRL=" + alphaFTRL + ", betaFTRL=" - + betaFTRL + ", lambda1=" + lambda1 + ", lambda2=" + lambda2 + "], " - + super.toString(); + + ", useFTRL=" + useFTRL + ", alphaFTRL=" + alphaFTRL + ", betaFTRL=" + betaFTRL + + ", lambda1=" + lambda1 + ", lambda2=" + lambda2 + "], " + super.toString(); } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/fm/FMIntFeatureMapModel.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/fm/FMIntFeatureMapModel.java b/core/src/main/java/hivemall/fm/FMIntFeatureMapModel.java index cbb0d70..72d64c0 100644 --- a/core/src/main/java/hivemall/fm/FMIntFeatureMapModel.java +++ b/core/src/main/java/hivemall/fm/FMIntFeatureMapModel.java @@ -136,8 +136,8 @@ public final class FMIntFeatureMapModel extends FactorizationMachineModel { } final int idx = e.getFeatureIndex(); if (idx < 1) { - throw new HiveException("Index of x should be greater than or equals to 1: " - + Arrays.toString(x)); + throw new HiveException( + "Index of x should be greater than or equals to 1: " + Arrays.toString(x)); } if (!_w.containsKey(idx)) { _w.put(idx, 0.f); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/fm/FMPredictGenericUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/fm/FMPredictGenericUDAF.java b/core/src/main/java/hivemall/fm/FMPredictGenericUDAF.java index 730cc49..6de298f 100644 --- a/core/src/main/java/hivemall/fm/FMPredictGenericUDAF.java +++ b/core/src/main/java/hivemall/fm/FMPredictGenericUDAF.java @@ -55,8 +55,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObj import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -@Description( - name = "fm_predict", +@Description(name = "fm_predict", value = "_FUNC_(Float Wj, array<float> Vjf, float Xj) - Returns a prediction value in Double") public final class FMPredictGenericUDAF extends AbstractGenericUDAFResolver { @@ -123,8 +122,10 @@ public final class FMPredictGenericUDAF extends AbstractGenericUDAFResolver { this.sumVjXjField = soi.getStructFieldRef("sumVjXj"); this.sumV2X2Field = soi.getStructFieldRef("sumV2X2"); this.retOI = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; - this.sumVjXjOI = ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - this.sumV2X2OI = ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + this.sumVjXjOI = ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + this.sumV2X2OI = ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); } // initialize output @@ -144,9 +145,11 @@ public final class FMPredictGenericUDAF extends AbstractGenericUDAFResolver { fieldNames.add("ret"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); fieldNames.add("sumVjXj"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); fieldNames.add("sumV2X2"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); } @@ -310,7 +313,8 @@ public final class FMPredictGenericUDAF extends AbstractGenericUDAFResolver { throw new HiveException("Mismatch in the number of factors"); } - final WritableDoubleObjectInspector doubleOI = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + final WritableDoubleObjectInspector doubleOI = + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; for (int f = 0; f < factors; f++) { Object o1 = sumVjXjOI.getListElement(o_sumVjXj, f); Object o2 = sumV2X2OI.getListElement(o_sumV2X2, f); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/fm/FactorizationMachineModel.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/fm/FactorizationMachineModel.java b/core/src/main/java/hivemall/fm/FactorizationMachineModel.java index eb26276..bb97bef 100644 --- a/core/src/main/java/hivemall/fm/FactorizationMachineModel.java +++ b/core/src/main/java/hivemall/fm/FactorizationMachineModel.java @@ -157,9 +157,9 @@ public abstract class FactorizationMachineModel { assert (!Double.isNaN(ret)); } if (!NumberUtils.isFinite(ret)) { - throw new HiveException("Detected " + ret - + " in predict. We recommend to normalize training examples.\n" - + "Dumping variables ...\n" + varDump(x)); + throw new HiveException( + "Detected " + ret + " in predict. We recommend to normalize training examples.\n" + + "Dumping variables ...\n" + varDump(x)); } return ret; } @@ -222,9 +222,9 @@ public abstract class FactorizationMachineModel { float wi = getW(x); float nextWi = wi - eta * (gradWi + 2.f * _lambdaW * wi); if (!NumberUtils.isFinite(nextWi)) { - throw new IllegalStateException("Got " + nextWi + " for next W[" + x.getFeature() - + "]\n" + "Xi=" + Xi + ", gradWi=" + gradWi + ", wi=" + wi + ", dloss=" + dloss - + ", eta=" + eta); + throw new IllegalStateException( + "Got " + nextWi + " for next W[" + x.getFeature() + "]\n" + "Xi=" + Xi + ", gradWi=" + + gradWi + ", wi=" + wi + ", dloss=" + dloss + ", eta=" + eta); } setW(x, nextWi); } @@ -238,10 +238,10 @@ public abstract class FactorizationMachineModel { float LambdaVf = getLambdaV(f); float nextVif = Vif - eta * (gradV + 2.f * LambdaVf * Vif); if (!NumberUtils.isFinite(nextVif)) { - throw new IllegalStateException("Got " + nextVif + " for next V" + f + '[' - + x.getFeature() + "]\n" + "Xi=" + Xi + ", Vif=" + Vif + ", h=" + h - + ", gradV=" + gradV + ", lambdaVf=" + LambdaVf + ", dloss=" + dloss - + ", sumViX=" + sumViX + ", eta=" + eta); + throw new IllegalStateException( + "Got " + nextVif + " for next V" + f + '[' + x.getFeature() + "]\n" + "Xi=" + Xi + + ", Vif=" + Vif + ", h=" + h + ", gradV=" + gradV + ", lambdaVf=" + + LambdaVf + ", dloss=" + dloss + ", sumViX=" + sumViX + ", eta=" + eta); } setV(x, f, nextVif); } @@ -317,8 +317,8 @@ public abstract class FactorizationMachineModel { ret += Vjf * xj; } if (!NumberUtils.isFinite(ret)) { - throw new IllegalStateException("Got " + ret + " for sumV[ " + f + "]X.\n" + "x = " - + Arrays.toString(x)); + throw new IllegalStateException( + "Got " + ret + " for sumV[ " + f + "]X.\n" + "x = " + Arrays.toString(x)); } return ret; } @@ -350,7 +350,8 @@ public abstract class FactorizationMachineModel { } @Nonnull - public static VInitScheme resolve(@Nullable String opt, @Nonnull VInitScheme defaultScheme) { + public static VInitScheme resolve(@Nullable String opt, + @Nonnull VInitScheme defaultScheme) { if (opt == null) { return defaultScheme; } else if ("gaussian".equalsIgnoreCase(opt)) { @@ -389,8 +390,8 @@ public abstract class FactorizationMachineModel { gaussianFill(ret, _initScheme.rand, _initScheme.initStdDev); break; default: - throw new IllegalStateException("Unsupported V initialization scheme: " - + _initScheme); + throw new IllegalStateException( + "Unsupported V initialization scheme: " + _initScheme); } return ret; } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/fm/FactorizationMachineUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/fm/FactorizationMachineUDTF.java b/core/src/main/java/hivemall/fm/FactorizationMachineUDTF.java index bbb3ef1..eadd451 100644 --- a/core/src/main/java/hivemall/fm/FactorizationMachineUDTF.java +++ b/core/src/main/java/hivemall/fm/FactorizationMachineUDTF.java @@ -65,8 +65,7 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.Counters.Counter; import org.apache.hadoop.mapred.Reporter; -@Description( - name = "train_fm", +@Description(name = "train_fm", value = "_FUNC_(array<string> x, double y [, const string options]) - Returns a prediction model") public class FactorizationMachineUDTF extends UDTFWithOptions { private static final Log LOG = LogFactory.getLog(FactorizationMachineUDTF.class); @@ -204,10 +203,9 @@ public class FactorizationMachineUDTF extends UDTFWithOptions { @Override public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { if (argOIs.length != 2 && argOIs.length != 3) { - throw new UDFArgumentException( - getClass().getSimpleName() - + " takes 2 or 3 arguments: array<string> x, double y [, CONSTANT STRING options]: " - + Arrays.toString(argOIs)); + throw new UDFArgumentException(getClass().getSimpleName() + + " takes 2 or 3 arguments: array<string> x, double y [, CONSTANT STRING options]: " + + Arrays.toString(argOIs)); } this._xOI = HiveUtils.asListOI(argOIs[0]); HiveUtils.validateFeatureOI(_xOI.getListElementObjectInspector()); @@ -244,7 +242,8 @@ public class FactorizationMachineUDTF extends UDTFWithOptions { fieldNames.add("W_i"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableFloatObjectInspector); fieldNames.add("V_if"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableFloatObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableFloatObjectInspector)); return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); } @@ -311,8 +310,8 @@ public class FactorizationMachineUDTF extends UDTFWithOptions { file = File.createTempFile("hivemall_fm", ".sgmt"); file.deleteOnExit(); if (!file.canWrite()) { - throw new UDFArgumentException("Cannot write a temporary file: " - + file.getAbsolutePath()); + throw new UDFArgumentException( + "Cannot write a temporary file: " + file.getAbsolutePath()); } LOG.info("Record training examples to a file: " + file.getAbsolutePath()); } catch (IOException ioe) { @@ -543,8 +542,8 @@ public class FactorizationMachineUDTF extends UDTFWithOptions { final boolean adaregr = _va_rand != null; final Reporter reporter = getReporter(); - final Counter iterCounter = (reporter == null) ? null : reporter.getCounter( - "hivemall.fm.FactorizationMachines$Counter", "iteration"); + final Counter iterCounter = (reporter == null) ? null + : reporter.getCounter("hivemall.fm.FactorizationMachines$Counter", "iteration"); try { if (fileIO.getPosition() == 0L) {// run iterations w/o temporary file @@ -589,8 +588,8 @@ public class FactorizationMachineUDTF extends UDTFWithOptions { try { fileIO.flush(); } catch (IOException e) { - throw new HiveException("Failed to flush a file: " - + fileIO.getFile().getAbsolutePath(), e); + throw new HiveException( + "Failed to flush a file: " + fileIO.getFile().getAbsolutePath(), e); } if (LOG.isInfoEnabled()) { File tmpFile = fileIO.getFile(); @@ -615,8 +614,8 @@ public class FactorizationMachineUDTF extends UDTFWithOptions { try { bytesRead = fileIO.read(inputBuf); } catch (IOException e) { - throw new HiveException("Failed to read a file: " - + fileIO.getFile().getAbsolutePath(), e); + throw new HiveException( + "Failed to read a file: " + fileIO.getFile().getAbsolutePath(), e); } if (bytesRead == 0) { // reached file EOF break; @@ -667,8 +666,8 @@ public class FactorizationMachineUDTF extends UDTFWithOptions { try { fileIO.close(true); } catch (IOException e) { - throw new HiveException("Failed to close a file: " - + fileIO.getFile().getAbsolutePath(), e); + throw new HiveException( + "Failed to close a file: " + fileIO.getFile().getAbsolutePath(), e); } this._inputBuf = null; this._fileIO = null; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/fm/FieldAwareFactorizationMachineModel.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/fm/FieldAwareFactorizationMachineModel.java b/core/src/main/java/hivemall/fm/FieldAwareFactorizationMachineModel.java index 4c0d83e..c6c0fd0 100644 --- a/core/src/main/java/hivemall/fm/FieldAwareFactorizationMachineModel.java +++ b/core/src/main/java/hivemall/fm/FieldAwareFactorizationMachineModel.java @@ -109,9 +109,9 @@ public abstract class FieldAwareFactorizationMachineModel extends FactorizationM } } if (!NumberUtils.isFinite(ret)) { - throw new HiveException("Detected " + ret - + " in predict. We recommend to normalize training examples.\n" - + "Dumping variables ...\n" + varDump(x)); + throw new HiveException( + "Detected " + ret + " in predict. We recommend to normalize training examples.\n" + + "Dumping variables ...\n" + varDump(x)); } return ret; } @@ -131,9 +131,9 @@ public abstract class FieldAwareFactorizationMachineModel extends FactorizationM final float eta = eta(theta, t, gradWi); float nextWi = wi - eta * (gradWi + 2.f * _lambdaW * wi); if (!NumberUtils.isFinite(nextWi)) { - throw new IllegalStateException("Got " + nextWi + " for next W[" + x.getFeature() - + "]\n" + "Xi=" + Xi + ", gradWi=" + gradWi + ", wi=" + wi + ", dloss=" + dloss - + ", eta=" + eta + ", t=" + t); + throw new IllegalStateException( + "Got " + nextWi + " for next W[" + x.getFeature() + "]\n" + "Xi=" + Xi + ", gradWi=" + + gradWi + ", wi=" + wi + ", dloss=" + dloss + ", eta=" + eta + ", t=" + t); } if (MathUtils.closeToZero(nextWi, 1E-9f)) { removeEntry(theta); @@ -159,8 +159,8 @@ public abstract class FieldAwareFactorizationMachineModel extends FactorizationM return; } - final float nextWi = (float) ((MathUtils.sign(z) * _lambda1 - z) / ((_beta + Math.sqrt(n)) - / _alpha + _lambda2)); + final float nextWi = (float) ((MathUtils.sign(z) * _lambda1 - z) + / ((_beta + Math.sqrt(n)) / _alpha + _lambda2)); if (!NumberUtils.isFinite(nextWi)) { throw new IllegalStateException("Got " + nextWi + " for next W[" + x.getFeature() + "]\n" + "Xi=" + Xi + ", gradWi=" + gradWi + ", wi=" + theta.getW() @@ -196,10 +196,10 @@ public abstract class FieldAwareFactorizationMachineModel extends FactorizationM final float eta = eta(theta, f, t, gradV); final float nextV = currentV - eta * (gradV + 2.f * lambdaVf * currentV); if (!NumberUtils.isFinite(nextV)) { - throw new IllegalStateException("Got " + nextV + " for next V" + f + '[' - + x.getFeatureIndex() + "]\n" + "Xi=" + Xi + ", Vif=" + currentV + ", h=" + h - + ", gradV=" + gradV + ", lambdaVf=" + lambdaVf + ", dloss=" + dloss - + ", sumViX=" + sumViX + ", t=" + t); + throw new IllegalStateException( + "Got " + nextV + " for next V" + f + '[' + x.getFeatureIndex() + "]\n" + "Xi=" + Xi + + ", Vif=" + currentV + ", h=" + h + ", gradV=" + gradV + ", lambdaVf=" + + lambdaVf + ", dloss=" + dloss + ", sumViX=" + sumViX + ", t=" + t); } if (MathUtils.closeToZero(nextV, 1E-9f)) { theta.setV(f, 0.f); @@ -234,13 +234,13 @@ public abstract class FieldAwareFactorizationMachineModel extends FactorizationM return; } - final float nextV = (float) ((MathUtils.sign(z) * _lambda1 - z) / ((_beta + Math.sqrt(n)) - / _alpha + _lambda2)); + final float nextV = (float) ((MathUtils.sign(z) * _lambda1 - z) + / ((_beta + Math.sqrt(n)) / _alpha + _lambda2)); if (!NumberUtils.isFinite(nextV)) { - throw new IllegalStateException("Got " + nextV + " for next V" + f + '[' - + x.getFeatureIndex() + "]\n" + "Xi=" + Xi + ", Vif=" + theta.getV(f) + ", h=" - + h + ", gradV=" + gradV + ", dloss=" + dloss + ", sumViX=" + sumViX + ", n=" - + n + ", z=" + z); + throw new IllegalStateException( + "Got " + nextV + " for next V" + f + '[' + x.getFeatureIndex() + "]\n" + "Xi=" + Xi + + ", Vif=" + theta.getV(f) + ", h=" + h + ", gradV=" + gradV + ", dloss=" + + dloss + ", sumViX=" + sumViX + ", n=" + n + ", z=" + z); } if (MathUtils.closeToZero(nextV, 1E-9f)) { theta.setV(f, 0.f); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/fm/FieldAwareFactorizationMachineUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/fm/FieldAwareFactorizationMachineUDTF.java b/core/src/main/java/hivemall/fm/FieldAwareFactorizationMachineUDTF.java index 953e090..610fa3d 100644 --- a/core/src/main/java/hivemall/fm/FieldAwareFactorizationMachineUDTF.java +++ b/core/src/main/java/hivemall/fm/FieldAwareFactorizationMachineUDTF.java @@ -56,8 +56,7 @@ import org.apache.hadoop.io.Text; * @link https://www.csie.ntu.edu.tw/~cjlin/libffm/ * @since v0.5-rc.1 */ -@Description( - name = "train_ffm", +@Description(name = "train_ffm", value = "_FUNC_(array<string> x, double y [, const string options]) - Returns a prediction model") public final class FieldAwareFactorizationMachineUDTF extends FactorizationMachineUDTF { private static final Log LOG = LogFactory.getLog(FieldAwareFactorizationMachineUDTF.class); @@ -86,7 +85,8 @@ public final class FieldAwareFactorizationMachineUDTF extends FactorizationMachi Options opts = super.getOptions(); opts.addOption("w0", "global_bias", false, "Whether to include global bias term w0 [default: OFF]"); - opts.addOption("disable_wi", "no_coeff", false, "Not to include linear term [default: OFF]"); + opts.addOption("disable_wi", "no_coeff", false, + "Not to include linear term [default: OFF]"); // feature hashing opts.addOption("feature_hashing", true, "The number of bits for feature hashing in range [18,31] [default: -1]. No feature hashing for -1."); @@ -101,10 +101,7 @@ public final class FieldAwareFactorizationMachineUDTF extends FactorizationMachi "Alpha value (learning rate) of Follow-The-Regularized-Reader [default: 0.2]"); opts.addOption("beta", "betaFTRL", true, "Beta value (a learning smoothing parameter) of Follow-The-Regularized-Reader [default: 1.0]"); - opts.addOption( - "l1", - "lambda1", - true, + opts.addOption("l1", "lambda1", true, "L1 regularization value of Follow-The-Regularized-Reader that controls model Sparseness [default: 0.001]"); opts.addOption("l2", "lambda2", true, "L2 regularization value of Follow-The-Regularized-Reader [default: 0.0001]"); @@ -157,7 +154,8 @@ public final class FieldAwareFactorizationMachineUDTF extends FactorizationMachi fieldOIs.add(PrimitiveObjectInspectorFactory.writableFloatObjectInspector); fieldNames.add("Vi"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableFloatObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableFloatObjectInspector)); return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/ftvec/AddBiasUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/ftvec/AddBiasUDF.java b/core/src/main/java/hivemall/ftvec/AddBiasUDF.java index 5870a4f..13f7d06 100644 --- a/core/src/main/java/hivemall/ftvec/AddBiasUDF.java +++ b/core/src/main/java/hivemall/ftvec/AddBiasUDF.java @@ -30,8 +30,7 @@ import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -@Description( - name = "add_bias", +@Description(name = "add_bias", value = "_FUNC_(feature_vector in array<string>) - Returns features with a bias in array<string>") @UDFType(deterministic = true, stateful = false) public final class AddBiasUDF extends UDF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/ftvec/AddFeatureIndexUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/ftvec/AddFeatureIndexUDF.java b/core/src/main/java/hivemall/ftvec/AddFeatureIndexUDF.java index 21b3514..c13b3a9 100644 --- a/core/src/main/java/hivemall/ftvec/AddFeatureIndexUDF.java +++ b/core/src/main/java/hivemall/ftvec/AddFeatureIndexUDF.java @@ -35,8 +35,7 @@ import org.apache.hadoop.io.Text; * > ["1:3.0","2:4.0","3:5.0"] * </pre> */ -@Description( - name = "add_feature_index", +@Description(name = "add_feature_index", value = "_FUNC_(ARRAY[DOUBLE]: dense feature vector) - Returns a feature vector with feature indices") @UDFType(deterministic = true, stateful = false) public final class AddFeatureIndexUDF extends UDF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/ftvec/ExtractWeightUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/ftvec/ExtractWeightUDF.java b/core/src/main/java/hivemall/ftvec/ExtractWeightUDF.java index 01d4c01..f275b6f 100644 --- a/core/src/main/java/hivemall/ftvec/ExtractWeightUDF.java +++ b/core/src/main/java/hivemall/ftvec/ExtractWeightUDF.java @@ -29,8 +29,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.serde2.io.DoubleWritable; -@Description( - name = "extract_weight", +@Description(name = "extract_weight", value = "_FUNC_(feature_vector in array<string>) - Returns the weights of features in array<string>") @UDFType(deterministic = true, stateful = false) public final class ExtractWeightUDF extends UDF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/ftvec/FeatureUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/ftvec/FeatureUDF.java b/core/src/main/java/hivemall/ftvec/FeatureUDF.java index b44459e..b2dc5fa 100644 --- a/core/src/main/java/hivemall/ftvec/FeatureUDF.java +++ b/core/src/main/java/hivemall/ftvec/FeatureUDF.java @@ -33,8 +33,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.Text; -@Description( - name = "feature", +@Description(name = "feature", value = "_FUNC_(<string|int|long|short|byte> feature, <number> value) - Returns a feature string") @UDFType(deterministic = true, stateful = false) public final class FeatureUDF extends GenericUDF { @@ -77,10 +76,11 @@ public final class FeatureUDF extends GenericUDF { } } - private static void validateValueOI(@Nonnull ObjectInspector argOI) throws UDFArgumentException { + private static void validateValueOI(@Nonnull ObjectInspector argOI) + throws UDFArgumentException { if (!HiveUtils.isNumberOI(argOI)) { - throw new UDFArgumentException("_FUNC_ expects a number type for `value` but got " - + argOI.getTypeName()); + throw new UDFArgumentException( + "_FUNC_ expects a number type for `value` but got " + argOI.getTypeName()); } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/ftvec/amplify/RandomAmplifierUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/ftvec/amplify/RandomAmplifierUDTF.java b/core/src/main/java/hivemall/ftvec/amplify/RandomAmplifierUDTF.java index 687d69a..a9d7bc4 100644 --- a/core/src/main/java/hivemall/ftvec/amplify/RandomAmplifierUDTF.java +++ b/core/src/main/java/hivemall/ftvec/amplify/RandomAmplifierUDTF.java @@ -40,7 +40,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @Description(name = "rand_amplify", value = "_FUNC_(const int xtimes [, const string options], *)" + " - amplify the input records x-times in map-side") -public final class RandomAmplifierUDTF extends UDTFWithOptions implements DropoutListener<Object[]> { +public final class RandomAmplifierUDTF extends UDTFWithOptions + implements DropoutListener<Object[]> { private boolean hasOption = false; private long seed = -1L; @@ -66,7 +67,8 @@ public final class RandomAmplifierUDTF extends UDTFWithOptions implements Dropou cl = parseOptions(rawArgs); this.hasOption = true; this.seed = Primitives.parseLong(cl.getOptionValue("seed"), this.seed); - this.numBuffers = Primitives.parseInt(cl.getOptionValue("num_buffers"), this.numBuffers); + this.numBuffers = + Primitives.parseInt(cl.getOptionValue("num_buffers"), this.numBuffers); } return cl; } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/ftvec/binning/BuildBinsUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/ftvec/binning/BuildBinsUDAF.java b/core/src/main/java/hivemall/ftvec/binning/BuildBinsUDAF.java index 995414d..b2f4fc5 100644 --- a/core/src/main/java/hivemall/ftvec/binning/BuildBinsUDAF.java +++ b/core/src/main/java/hivemall/ftvec/binning/BuildBinsUDAF.java @@ -145,23 +145,31 @@ public final class BuildBinsUDAF extends AbstractGenericUDAFResolver { autoShrinkField = structOI.getStructFieldRef("autoShrink"); histogramField = structOI.getStructFieldRef("histogram"); quantilesField = structOI.getStructFieldRef("quantiles"); - autoShrinkOI = (WritableBooleanObjectInspector) autoShrinkField.getFieldObjectInspector(); - histogramOI = (StandardListObjectInspector) histogramField.getFieldObjectInspector(); - quantilesOI = (StandardListObjectInspector) quantilesField.getFieldObjectInspector(); - histogramElOI = (WritableDoubleObjectInspector) histogramOI.getListElementObjectInspector(); - quantileOI = (WritableDoubleObjectInspector) quantilesOI.getListElementObjectInspector(); + autoShrinkOI = + (WritableBooleanObjectInspector) autoShrinkField.getFieldObjectInspector(); + histogramOI = + (StandardListObjectInspector) histogramField.getFieldObjectInspector(); + quantilesOI = + (StandardListObjectInspector) quantilesField.getFieldObjectInspector(); + histogramElOI = + (WritableDoubleObjectInspector) histogramOI.getListElementObjectInspector(); + quantileOI = + (WritableDoubleObjectInspector) quantilesOI.getListElementObjectInspector(); } if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { final ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(); fieldOIs.add(PrimitiveObjectInspectorFactory.writableBooleanObjectInspector); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); return ObjectInspectorFactory.getStandardStructObjectInspector( Arrays.asList("autoShrink", "histogram", "quantiles"), fieldOIs); } else { - return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + return ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); } } @@ -215,7 +223,8 @@ public final class BuildBinsUDAF extends AbstractGenericUDAFResolver { final BuildBinsAggregationBuffer myAgg = (BuildBinsAggregationBuffer) agg; - myAgg.autoShrink = autoShrinkOI.get(structOI.getStructFieldData(other, autoShrinkField)); + myAgg.autoShrink = + autoShrinkOI.get(structOI.getStructFieldData(other, autoShrinkField)); final List<?> histogram = ((LazyBinaryArray) structOI.getStructFieldData(other, histogramField)).getList(); @@ -235,8 +244,9 @@ public final class BuildBinsUDAF extends AbstractGenericUDAFResolver { final Object[] partialResult = new Object[3]; partialResult[0] = new BooleanWritable(myAgg.autoShrink); partialResult[1] = myAgg.histogram.serialize(); - partialResult[2] = (myAgg.quantiles != null) ? WritableUtils.toWritableList(myAgg.quantiles) - : Collections.singletonList(new DoubleWritable(0)); + partialResult[2] = + (myAgg.quantiles != null) ? WritableUtils.toWritableList(myAgg.quantiles) + : Collections.singletonList(new DoubleWritable(0)); return partialResult; } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/ftvec/binning/FeatureBinningUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/ftvec/binning/FeatureBinningUDF.java b/core/src/main/java/hivemall/ftvec/binning/FeatureBinningUDF.java index 14966bf..f713937 100644 --- a/core/src/main/java/hivemall/ftvec/binning/FeatureBinningUDF.java +++ b/core/src/main/java/hivemall/ftvec/binning/FeatureBinningUDF.java @@ -39,8 +39,7 @@ import org.apache.hadoop.io.Text; import java.util.*; -@Description( - name = "feature_binning", +@Description(name = "feature_binning", value = "_FUNC_(array<features::string> features, const map<string, array<number>> quantiles_map)" + " / _FUNC_(number weight, const array<number> quantiles)" + " - Returns binned features as an array<features::string> / bin ID as int") @@ -69,7 +68,8 @@ public final class FeatureBinningUDF extends GenericUDF { if (HiveUtils.isListOI(OIs[0]) && HiveUtils.isMapOI(OIs[1])) { // for (array<features::string> features, const map<string, array<number>> quantiles_map) - if (!HiveUtils.isStringOI(((ListObjectInspector) OIs[0]).getListElementObjectInspector())) { + if (!HiveUtils.isStringOI( + ((ListObjectInspector) OIs[0]).getListElementObjectInspector())) { throw new UDFArgumentTypeException(0, "Only array<string> type argument is acceptable but " + OIs[0].getTypeName() + " was passed as `features`"); @@ -80,18 +80,21 @@ public final class FeatureBinningUDF extends GenericUDF { quantilesMapOI = HiveUtils.asMapOI(OIs[1]); if (!HiveUtils.isStringOI(quantilesMapOI.getMapKeyObjectInspector()) || !HiveUtils.isListOI(quantilesMapOI.getMapValueObjectInspector()) - || !HiveUtils.isNumberOI(((ListObjectInspector) quantilesMapOI.getMapValueObjectInspector()).getListElementObjectInspector())) { + || !HiveUtils.isNumberOI( + ((ListObjectInspector) quantilesMapOI.getMapValueObjectInspector()).getListElementObjectInspector())) { throw new UDFArgumentTypeException(1, "Only map<string, array<number>> type argument is acceptable but " + OIs[1].getTypeName() + " was passed as `quantiles_map`"); } keyOI = HiveUtils.asStringOI(quantilesMapOI.getMapKeyObjectInspector()); quantilesOI = HiveUtils.asListOI(quantilesMapOI.getMapValueObjectInspector()); - quantileOI = HiveUtils.asDoubleCompatibleOI(quantilesOI.getListElementObjectInspector()); + quantileOI = + HiveUtils.asDoubleCompatibleOI(quantilesOI.getListElementObjectInspector()); multiple = true; - return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + return ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableStringObjectInspector); } else if (HiveUtils.isPrimitiveOI(OIs[0]) && HiveUtils.isListOI(OIs[1])) { // for (number weight, const array<number> quantiles) @@ -103,7 +106,8 @@ public final class FeatureBinningUDF extends GenericUDF { "Only array<number> type argument is acceptable but " + OIs[1].getTypeName() + " was passed as `quantiles`"); } - quantileOI = HiveUtils.asDoubleCompatibleOI(quantilesOI.getListElementObjectInspector()); + quantileOI = + HiveUtils.asDoubleCompatibleOI(quantilesOI.getListElementObjectInspector()); multiple = false; @@ -148,7 +152,8 @@ public final class FeatureBinningUDF extends GenericUDF { // binning if (quantilesMap.containsKey(key)) { - val = String.valueOf(findBin(quantilesMap.get(key), Double.parseDouble(val))); + val = String.valueOf( + findBin(quantilesMap.get(key), Double.parseDouble(val))); } result.add(new Text(key + ":" + val)); } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/ftvec/binning/NumericHistogram.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/ftvec/binning/NumericHistogram.java b/core/src/main/java/hivemall/ftvec/binning/NumericHistogram.java index e4c4d4d..ee0ca32 100644 --- a/core/src/main/java/hivemall/ftvec/binning/NumericHistogram.java +++ b/core/src/main/java/hivemall/ftvec/binning/NumericHistogram.java @@ -32,10 +32,10 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspe * **THIS CLASS IS IMPORTED FROM HIVE 2.1.0 FOR COMPATIBILITY** * * A generic, re-usable histogram class that supports partial aggregations. The algorithm is a - * heuristic adapted from the following paper: Yael Ben-Haim and Elad Tom-Tov, - * "A streaming parallel decision tree algorithm", J. Machine Learning Research 11 (2010), pp. - * 849--872. Although there are no approximation guarantees, it appears to work well with adequate - * data and a large (e.g., 20-80) number of histogram bins. + * heuristic adapted from the following paper: Yael Ben-Haim and Elad Tom-Tov, "A streaming parallel + * decision tree algorithm", J. Machine Learning Research 11 (2010), pp. 849--872. Although there + * are no approximation guarantees, it appears to work well with adequate data and a large (e.g., + * 20-80) number of histogram bins. */ public final class NumericHistogram { /** @@ -242,8 +242,8 @@ public final class NumericHistogram { double d = bins.get(smallestdiffloc).y + bins.get(smallestdiffloc + 1).y; Coord smallestdiffbin = bins.get(smallestdiffloc); smallestdiffbin.x *= smallestdiffbin.y / d; - smallestdiffbin.x += bins.get(smallestdiffloc + 1).x / d - * bins.get(smallestdiffloc + 1).y; + smallestdiffbin.x += + bins.get(smallestdiffloc + 1).x / d * bins.get(smallestdiffloc + 1).y; smallestdiffbin.y = d; // Shift the remaining bins left one position bins.remove(smallestdiffloc + 1); @@ -273,8 +273,8 @@ public final class NumericHistogram { } csum -= bins.get(b).y; - double r = bins.get(b - 1).x + (q * sum - csum) - * (bins.get(b).x - bins.get(b - 1).x) / (bins.get(b).y); + double r = bins.get(b - 1).x + + (q * sum - csum) * (bins.get(b).x - bins.get(b - 1).x) / (bins.get(b).y); return r; } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/ftvec/conv/ConvertToDenseModelUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/ftvec/conv/ConvertToDenseModelUDAF.java b/core/src/main/java/hivemall/ftvec/conv/ConvertToDenseModelUDAF.java index 008dd3a..3ffe015 100644 --- a/core/src/main/java/hivemall/ftvec/conv/ConvertToDenseModelUDAF.java +++ b/core/src/main/java/hivemall/ftvec/conv/ConvertToDenseModelUDAF.java @@ -28,8 +28,7 @@ import org.apache.hadoop.hive.ql.exec.UDAFEvaluator; import org.apache.hadoop.io.FloatWritable; @SuppressWarnings("deprecation") -@Description( - name = "conv2dense", +@Description(name = "conv2dense", value = "_FUNC_(int feature, float weight, int nDims) - Return a dense model in array<float>") public class ConvertToDenseModelUDAF extends UDAF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/ftvec/conv/QuantifyColumnsUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/ftvec/conv/QuantifyColumnsUDTF.java b/core/src/main/java/hivemall/ftvec/conv/QuantifyColumnsUDTF.java index dcca752..ca706f6 100644 --- a/core/src/main/java/hivemall/ftvec/conv/QuantifyColumnsUDTF.java +++ b/core/src/main/java/hivemall/ftvec/conv/QuantifyColumnsUDTF.java @@ -48,8 +48,8 @@ public final class QuantifyColumnsUDTF extends GenericUDTF { public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { int size = argOIs.length; if (size < 2) { - throw new UDFArgumentException("quantified_features takes at least two arguments: " - + size); + throw new UDFArgumentException( + "quantified_features takes at least two arguments: " + size); } this.boolOI = HiveUtils.asBooleanOI(argOIs[0]); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/ftvec/hashing/ArrayHashValuesUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/ftvec/hashing/ArrayHashValuesUDF.java b/core/src/main/java/hivemall/ftvec/hashing/ArrayHashValuesUDF.java index b8c0c13..86f0583 100644 --- a/core/src/main/java/hivemall/ftvec/hashing/ArrayHashValuesUDF.java +++ b/core/src/main/java/hivemall/ftvec/hashing/ArrayHashValuesUDF.java @@ -30,8 +30,7 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.io.IntWritable; -@Description( - name = "array_hash_values", +@Description(name = "array_hash_values", value = "_FUNC_(array<string> values, [string prefix [, int numFeatures], boolean useIndexAsPrefix])" + " returns hash values in array<int>") @UDFType(deterministic = true, stateful = false) @@ -45,7 +44,8 @@ public final class ArrayHashValuesUDF extends UDF { return evaluate(values, prefix, MurmurHash3.DEFAULT_NUM_FEATURES); } - public List<IntWritable> evaluate(List<String> values, String prefix, boolean useIndexAsPrefix) { + public List<IntWritable> evaluate(List<String> values, String prefix, + boolean useIndexAsPrefix) { return evaluate(values, prefix, MurmurHash3.DEFAULT_NUM_FEATURES, useIndexAsPrefix); } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/ftvec/hashing/FeatureHashingUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/ftvec/hashing/FeatureHashingUDF.java b/core/src/main/java/hivemall/ftvec/hashing/FeatureHashingUDF.java index ce6565c..b2d5dac 100644 --- a/core/src/main/java/hivemall/ftvec/hashing/FeatureHashingUDF.java +++ b/core/src/main/java/hivemall/ftvec/hashing/FeatureHashingUDF.java @@ -98,7 +98,8 @@ public final class FeatureHashingUDF extends UDFWithOptions { if (_listOI == null) { return PrimitiveObjectInspectorFactory.writableStringObjectInspector; } else { - return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + return ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableStringObjectInspector); } }
