http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/math/random/RandomNumberGeneratorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/math/random/RandomNumberGeneratorFactory.java b/core/src/main/java/hivemall/math/random/RandomNumberGeneratorFactory.java index 8843f7e..2045d33 100644 --- a/core/src/main/java/hivemall/math/random/RandomNumberGeneratorFactory.java +++ b/core/src/main/java/hivemall/math/random/RandomNumberGeneratorFactory.java @@ -80,15 +80,15 @@ public final class RandomNumberGeneratorFactory { rng = new SmileRandom(seed); break; case smileMT: - rng = new SmileRandom(new smile.math.random.MersenneTwister( - Primitives.hashCode(seed))); + rng = new SmileRandom( + new smile.math.random.MersenneTwister(Primitives.hashCode(seed))); break; case smileMT64: rng = new SmileRandom(new smile.math.random.MersenneTwister64(seed)); break; case commonsMath3MT: - rng = new CommonsMathRandom(new org.apache.commons.math3.random.MersenneTwister( - seed)); + rng = new CommonsMathRandom( + new org.apache.commons.math3.random.MersenneTwister(seed)); break; default: throw new IllegalStateException("Unexpected type: " + type);
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/mf/BPRMFPredictionUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/mf/BPRMFPredictionUDF.java b/core/src/main/java/hivemall/mf/BPRMFPredictionUDF.java index e555d7b..6b72622 100644 --- a/core/src/main/java/hivemall/mf/BPRMFPredictionUDF.java +++ b/core/src/main/java/hivemall/mf/BPRMFPredictionUDF.java @@ -26,8 +26,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.io.FloatWritable; -@Description( - name = "bprmf_predict", +@Description(name = "bprmf_predict", value = "_FUNC_(List<Float> Pu, List<Float> Qi[, double Bi]) - Returns the prediction value") @UDFType(deterministic = true, stateful = false) public final class BPRMFPredictionUDF extends UDF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/mf/BPRMatrixFactorizationUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/mf/BPRMatrixFactorizationUDTF.java b/core/src/main/java/hivemall/mf/BPRMatrixFactorizationUDTF.java index e4bd781..76d52ab 100644 --- a/core/src/main/java/hivemall/mf/BPRMatrixFactorizationUDTF.java +++ b/core/src/main/java/hivemall/mf/BPRMatrixFactorizationUDTF.java @@ -227,9 +227,8 @@ public final class BPRMatrixFactorizationUDTF extends UDTFWithOptions implements @Override public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { if (argOIs.length != 3 && argOIs.length != 4) { - throw new UDFArgumentException( - getClass().getSimpleName() - + " takes 3 or 4 arguments: INT user, INT posItem, INT negItem [, CONSTANT STRING options]"); + throw new UDFArgumentException(getClass().getSimpleName() + + " takes 3 or 4 arguments: INT user, INT posItem, INT negItem [, CONSTANT STRING options]"); } this.userOI = HiveUtils.asIntCompatibleOI(argOIs[0]); this.posItemOI = HiveUtils.asIntCompatibleOI(argOIs[1]); @@ -251,8 +250,8 @@ public final class BPRMatrixFactorizationUDTF extends UDTFWithOptions implements file = File.createTempFile("hivemall_bprmf", ".sgmt"); file.deleteOnExit(); if (!file.canWrite()) { - throw new UDFArgumentException("Cannot write a temporary file: " - + file.getAbsolutePath()); + throw new UDFArgumentException( + "Cannot write a temporary file: " + file.getAbsolutePath()); } } catch (IOException ioe) { throw new UDFArgumentException(ioe); @@ -268,9 +267,11 @@ public final class BPRMatrixFactorizationUDTF extends UDTFWithOptions implements fieldNames.add("idx"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector); fieldNames.add("Pu"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableFloatObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableFloatObjectInspector)); fieldNames.add("Qi"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableFloatObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableFloatObjectInspector)); if (useBiasClause) { fieldNames.add("Bi"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableFloatObjectInspector); @@ -342,8 +343,8 @@ public final class BPRMatrixFactorizationUDTF extends UDTFWithOptions implements ret += userProbe[k] * itemProbe[k]; } if (!NumberUtils.isFinite(ret)) { - throw new IllegalStateException("Detected " + ret + " in predict where user=" + user - + " and item=" + item); + throw new IllegalStateException( + "Detected " + ret + " in predict where user=" + user + " and item=" + item); } return ret; } @@ -461,7 +462,8 @@ public final class BPRMatrixFactorizationUDTF extends UDTFWithOptions implements } } - private final void runIterativeTraining(@Nonnegative final int iterations) throws HiveException { + private final void runIterativeTraining(@Nonnegative final int iterations) + throws HiveException { final ByteBuffer inputBuf = this.inputBuf; final NioFixedSegment fileIO = this.fileIO; assert (inputBuf != null); @@ -469,8 +471,8 @@ public final class BPRMatrixFactorizationUDTF extends UDTFWithOptions implements final long numTrainingExamples = count; final Reporter reporter = getReporter(); - final Counter iterCounter = (reporter == null) ? null : reporter.getCounter( - "hivemall.mf.BPRMatrixFactorization$Counter", "iteration"); + final Counter iterCounter = (reporter == null) ? null + : reporter.getCounter("hivemall.mf.BPRMatrixFactorization$Counter", "iteration"); try { if (lastWritePos == 0) {// run iterations w/o temporary file @@ -517,8 +519,8 @@ public final class BPRMatrixFactorizationUDTF extends UDTFWithOptions implements try { fileIO.flush(); } catch (IOException e) { - throw new HiveException("Failed to flush a file: " - + fileIO.getFile().getAbsolutePath(), e); + throw new HiveException( + "Failed to flush a file: " + fileIO.getFile().getAbsolutePath(), e); } if (LOG.isInfoEnabled()) { File tmpFile = fileIO.getFile(); @@ -543,8 +545,8 @@ public final class BPRMatrixFactorizationUDTF extends UDTFWithOptions implements try { bytesRead = fileIO.read(seekPos, inputBuf); } catch (IOException e) { - throw new HiveException("Failed to read a file: " - + fileIO.getFile().getAbsolutePath(), e); + throw new HiveException( + "Failed to read a file: " + fileIO.getFile().getAbsolutePath(), e); } if (bytesRead == 0) { // reached file EOF break; @@ -586,8 +588,8 @@ public final class BPRMatrixFactorizationUDTF extends UDTFWithOptions implements try { fileIO.close(true); } catch (IOException e) { - throw new HiveException("Failed to close a file: " - + fileIO.getFile().getAbsolutePath(), e); + throw new HiveException( + "Failed to close a file: " + fileIO.getFile().getAbsolutePath(), e); } this.inputBuf = null; this.fileIO = null; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/mf/FactorizedModel.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/mf/FactorizedModel.java b/core/src/main/java/hivemall/mf/FactorizedModel.java index dc4dca9..c3aa739 100644 --- a/core/src/main/java/hivemall/mf/FactorizedModel.java +++ b/core/src/main/java/hivemall/mf/FactorizedModel.java @@ -154,8 +154,8 @@ public final class FactorizedModel { gaussianFill(v, randU, initScheme.initStdDev, ratingInitializer); break; default: - throw new IllegalStateException("Unsupported rank initialization scheme: " - + initScheme); + throw new IllegalStateException( + "Unsupported rank initialization scheme: " + initScheme); } users.put(u, v); @@ -183,8 +183,8 @@ public final class FactorizedModel { gaussianFill(v, randI, initScheme.initStdDev, ratingInitializer); break; default: - throw new IllegalStateException("Unsupported rank initialization scheme: " - + initScheme); + throw new IllegalStateException( + "Unsupported rank initialization scheme: " + initScheme); } items.put(i, v); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/mf/MFPredictionUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/mf/MFPredictionUDF.java b/core/src/main/java/hivemall/mf/MFPredictionUDF.java index f2fd0e2..3d9dfe4 100644 --- a/core/src/main/java/hivemall/mf/MFPredictionUDF.java +++ b/core/src/main/java/hivemall/mf/MFPredictionUDF.java @@ -30,8 +30,7 @@ import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.io.FloatWritable; -@Description( - name = "mf_predict", +@Description(name = "mf_predict", value = "_FUNC_(List<Float> Pu, List<Float> Qi[, double Bu, double Bi[, double mu]]) - Returns the prediction value") @UDFType(deterministic = true, stateful = false) public final class MFPredictionUDF extends UDF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/mf/MatrixFactorizationAdaGradUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/mf/MatrixFactorizationAdaGradUDTF.java b/core/src/main/java/hivemall/mf/MatrixFactorizationAdaGradUDTF.java index 82919c1..edc95fb 100644 --- a/core/src/main/java/hivemall/mf/MatrixFactorizationAdaGradUDTF.java +++ b/core/src/main/java/hivemall/mf/MatrixFactorizationAdaGradUDTF.java @@ -27,8 +27,7 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -@Description( - name = "train_mf_adagrad", +@Description(name = "train_mf_adagrad", value = "_FUNC_(INT user, INT item, FLOAT rating [, CONSTANT STRING options])" + " - Returns a relation consists of <int idx, array<float> Pu, array<float> Qi [, float Bu, float Bi [, float mu]]>") public final class MatrixFactorizationAdaGradUDTF extends OnlineMatrixFactorizationUDTF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/mf/MatrixFactorizationSGDUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/mf/MatrixFactorizationSGDUDTF.java b/core/src/main/java/hivemall/mf/MatrixFactorizationSGDUDTF.java index 7edeaa0..6e45ccf 100644 --- a/core/src/main/java/hivemall/mf/MatrixFactorizationSGDUDTF.java +++ b/core/src/main/java/hivemall/mf/MatrixFactorizationSGDUDTF.java @@ -26,8 +26,7 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -@Description( - name = "train_mf_sgd", +@Description(name = "train_mf_sgd", value = "_FUNC_(INT user, INT item, FLOAT rating [, CONSTANT STRING options])" + " - Returns a relation consists of <int idx, array<float> Pu, array<float> Qi [, float Bu, float Bi [, float mu]]>") public final class MatrixFactorizationSGDUDTF extends OnlineMatrixFactorizationUDTF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/mf/OnlineMatrixFactorizationUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/mf/OnlineMatrixFactorizationUDTF.java b/core/src/main/java/hivemall/mf/OnlineMatrixFactorizationUDTF.java index 2c10189..537706e 100644 --- a/core/src/main/java/hivemall/mf/OnlineMatrixFactorizationUDTF.java +++ b/core/src/main/java/hivemall/mf/OnlineMatrixFactorizationUDTF.java @@ -52,8 +52,8 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.mapred.Counters.Counter; import org.apache.hadoop.mapred.Reporter; -public abstract class OnlineMatrixFactorizationUDTF extends UDTFWithOptions implements - RatingInitializer { +public abstract class OnlineMatrixFactorizationUDTF extends UDTFWithOptions + implements RatingInitializer { private static final Log logger = LogFactory.getLog(OnlineMatrixFactorizationUDTF.class); private static final int RECORD_BYTES = (Integer.SIZE + Integer.SIZE + Double.SIZE) / 8; @@ -147,15 +147,16 @@ public abstract class OnlineMatrixFactorizationUDTF extends UDTFWithOptions impl initStdDev = Primitives.parseDouble(cl.getOptionValue("min_init_stddev"), 0.1d); this.iterations = Primitives.parseInt(cl.getOptionValue("iterations"), 1); if (iterations < 1) { - throw new UDFArgumentException("'-iterations' must be greater than or equal to 1: " - + iterations); + throw new UDFArgumentException( + "'-iterations' must be greater than or equal to 1: " + iterations); } conversionCheck = !cl.hasOption("disable_cvtest"); convergenceRate = Primitives.parseDouble(cl.getOptionValue("cv_rate"), convergenceRate); boolean noBias = cl.hasOption("no_bias"); this.useBiasClause = !noBias; if (noBias && updateMeanRating) { - throw new UDFArgumentException("Cannot set both `update_mean` and `no_bias` option"); + throw new UDFArgumentException( + "Cannot set both `update_mean` and `no_bias` option"); } } this.rankInit = RankInitScheme.resolve(rankInitOpt); @@ -191,8 +192,8 @@ public abstract class OnlineMatrixFactorizationUDTF extends UDTFWithOptions impl file = File.createTempFile("hivemall_mf", ".sgmt"); file.deleteOnExit(); if (!file.canWrite()) { - throw new UDFArgumentException("Cannot write a temporary file: " - + file.getAbsolutePath()); + throw new UDFArgumentException( + "Cannot write a temporary file: " + file.getAbsolutePath()); } } catch (IOException ioe) { throw new UDFArgumentException(ioe); @@ -208,9 +209,11 @@ public abstract class OnlineMatrixFactorizationUDTF extends UDTFWithOptions impl fieldNames.add("idx"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector); fieldNames.add("Pu"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableFloatObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableFloatObjectInspector)); fieldNames.add("Qi"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableFloatObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableFloatObjectInspector)); if (useBiasClause) { fieldNames.add("Bu"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableFloatObjectInspector); @@ -439,9 +442,9 @@ public abstract class OnlineMatrixFactorizationUDTF extends UDTFWithOptions impl numForwarded++; } this.model = null; // help GC - logger.info("Forwarded the prediction model of " + numForwarded - + " rows. [totalErrors=" + cvState.getTotalErrors() + ", lastLosses=" - + cvState.getCumulativeLoss() + ", #trainingExamples=" + count + "]"); + logger.info("Forwarded the prediction model of " + numForwarded + " rows. [totalErrors=" + + cvState.getTotalErrors() + ", lastLosses=" + cvState.getCumulativeLoss() + + ", #trainingExamples=" + count + "]"); } } @@ -467,8 +470,8 @@ public abstract class OnlineMatrixFactorizationUDTF extends UDTFWithOptions impl final long numTrainingExamples = count; final Reporter reporter = getReporter(); - final Counter iterCounter = (reporter == null) ? null : reporter.getCounter( - "hivemall.mf.MatrixFactorization$Counter", "iteration"); + final Counter iterCounter = (reporter == null) ? null + : reporter.getCounter("hivemall.mf.MatrixFactorization$Counter", "iteration"); try { if (lastWritePos == 0) {// run iterations w/o temporary file @@ -510,8 +513,8 @@ public abstract class OnlineMatrixFactorizationUDTF extends UDTFWithOptions impl try { fileIO.flush(); } catch (IOException e) { - throw new HiveException("Failed to flush a file: " - + fileIO.getFile().getAbsolutePath(), e); + throw new HiveException( + "Failed to flush a file: " + fileIO.getFile().getAbsolutePath(), e); } if (logger.isInfoEnabled()) { File tmpFile = fileIO.getFile(); @@ -536,8 +539,8 @@ public abstract class OnlineMatrixFactorizationUDTF extends UDTFWithOptions impl try { bytesRead = fileIO.read(seekPos, inputBuf); } catch (IOException e) { - throw new HiveException("Failed to read a file: " - + fileIO.getFile().getAbsolutePath(), e); + throw new HiveException( + "Failed to read a file: " + fileIO.getFile().getAbsolutePath(), e); } if (bytesRead == 0) { // reached file EOF break; @@ -574,8 +577,8 @@ public abstract class OnlineMatrixFactorizationUDTF extends UDTFWithOptions impl try { fileIO.close(true); } catch (IOException e) { - throw new HiveException("Failed to close a file: " - + fileIO.getFile().getAbsolutePath(), e); + throw new HiveException( + "Failed to close a file: " + fileIO.getFile().getAbsolutePath(), e); } this.inputBuf = null; this.fileIO = null; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/mix/MixMessageDecoder.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/mix/MixMessageDecoder.java b/core/src/main/java/hivemall/mix/MixMessageDecoder.java index 0841fca..12381c3 100644 --- a/core/src/main/java/hivemall/mix/MixMessageDecoder.java +++ b/core/src/main/java/hivemall/mix/MixMessageDecoder.java @@ -107,7 +107,8 @@ public final class MixMessageDecoder extends LengthFieldBasedFrameDecoder { } @Override - protected ByteBuf extractFrame(ChannelHandlerContext ctx, ByteBuf buffer, int index, int length) { + protected ByteBuf extractFrame(ChannelHandlerContext ctx, ByteBuf buffer, int index, + int length) { return buffer.slice(index, length); } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/mix/client/MixClient.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/mix/client/MixClient.java b/core/src/main/java/hivemall/mix/client/MixClient.java index 05b6d29..1f60074 100644 --- a/core/src/main/java/hivemall/mix/client/MixClient.java +++ b/core/src/main/java/hivemall/mix/client/MixClient.java @@ -114,8 +114,8 @@ public final class MixClient implements ModelUpdateHandler, Closeable { * @return true if sent request, otherwise false */ @Override - public boolean onUpdate(Object feature, float weight, float covar, short clock, int deltaUpdates) - throws Exception { + public boolean onUpdate(Object feature, float weight, float covar, short clock, + int deltaUpdates) throws Exception { assert (deltaUpdates > 0) : deltaUpdates; if (deltaUpdates < mixThreshold) { return false; // avoid mixing http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/mix/client/MixRequestRouter.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/mix/client/MixRequestRouter.java b/core/src/main/java/hivemall/mix/client/MixRequestRouter.java index 24cfb0c..0939f88 100644 --- a/core/src/main/java/hivemall/mix/client/MixRequestRouter.java +++ b/core/src/main/java/hivemall/mix/client/MixRequestRouter.java @@ -42,8 +42,8 @@ public final class MixRequestRouter { this.numNodes = numEndpoints; NodeInfo[] nodes = new NodeInfo[numEndpoints]; for (int i = 0; i < numEndpoints; i++) { - InetSocketAddress addr = NetUtils.getInetSocketAddress(endpoints[i], - MixEnv.MIXSERV_DEFAULT_PORT); + InetSocketAddress addr = + NetUtils.getInetSocketAddress(endpoints[i], MixEnv.MIXSERV_DEFAULT_PORT); nodes[i] = new NodeInfo(addr); } this.nodes = nodes; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/model/DenseModel.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/model/DenseModel.java b/core/src/main/java/hivemall/model/DenseModel.java index db72070..65ffb72 100644 --- a/core/src/main/java/hivemall/model/DenseModel.java +++ b/core/src/main/java/hivemall/model/DenseModel.java @@ -170,7 +170,8 @@ public final class DenseModel extends AbstractPredictionModel { } @Override - public <T extends IWeightValue> void set(@Nonnull final Object feature, @Nonnull final T value) { + public <T extends IWeightValue> void set(@Nonnull final Object feature, + @Nonnull final T value) { int i = HiveUtils.parseInt(feature); ensureCapacity(i); float weight = value.get(); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/model/FeatureValue.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/model/FeatureValue.java b/core/src/main/java/hivemall/model/FeatureValue.java index 11005e9..d7aecd8 100644 --- a/core/src/main/java/hivemall/model/FeatureValue.java +++ b/core/src/main/java/hivemall/model/FeatureValue.java @@ -151,7 +151,8 @@ public final class FeatureValue { return new FeatureValue(feature, weight); } - public static void parseFeatureAsString(@Nonnull final Text t, @Nonnull final FeatureValue probe) { + public static void parseFeatureAsString(@Nonnull final Text t, + @Nonnull final FeatureValue probe) { String s = t.toString(); parseFeatureAsString(s, probe); } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/model/NewDenseModel.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/model/NewDenseModel.java b/core/src/main/java/hivemall/model/NewDenseModel.java index b5db580..b020db2 100644 --- a/core/src/main/java/hivemall/model/NewDenseModel.java +++ b/core/src/main/java/hivemall/model/NewDenseModel.java @@ -131,7 +131,8 @@ public final class NewDenseModel extends AbstractPredictionModel { } @Override - public <T extends IWeightValue> void set(@Nonnull final Object feature, @Nonnull final T value) { + public <T extends IWeightValue> void set(@Nonnull final Object feature, + @Nonnull final T value) { int i = HiveUtils.parseInt(feature); ensureCapacity(i); float weight = value.get(); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/model/NewSpaceEfficientDenseModel.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/model/NewSpaceEfficientDenseModel.java b/core/src/main/java/hivemall/model/NewSpaceEfficientDenseModel.java index 0a473b4..a2cc6f6 100644 --- a/core/src/main/java/hivemall/model/NewSpaceEfficientDenseModel.java +++ b/core/src/main/java/hivemall/model/NewSpaceEfficientDenseModel.java @@ -150,7 +150,8 @@ public final class NewSpaceEfficientDenseModel extends AbstractPredictionModel { } @Override - public <T extends IWeightValue> void set(@Nonnull final Object feature, @Nonnull final T value) { + public <T extends IWeightValue> void set(@Nonnull final Object feature, + @Nonnull final T value) { int i = HiveUtils.parseInt(feature); ensureCapacity(i); float weight = value.get(); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/model/NewSparseModel.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/model/NewSparseModel.java b/core/src/main/java/hivemall/model/NewSparseModel.java index 99034e0..2149599 100644 --- a/core/src/main/java/hivemall/model/NewSparseModel.java +++ b/core/src/main/java/hivemall/model/NewSparseModel.java @@ -80,7 +80,8 @@ public final class NewSparseModel extends AbstractPredictionModel { } @Override - public <T extends IWeightValue> void set(@Nonnull final Object feature, @Nonnull final T value) { + public <T extends IWeightValue> void set(@Nonnull final Object feature, + @Nonnull final T value) { final IWeightValue wrapperValue = wrapIfRequired(value); if (clockEnabled && value.isTouched()) { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/model/SpaceEfficientDenseModel.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/model/SpaceEfficientDenseModel.java b/core/src/main/java/hivemall/model/SpaceEfficientDenseModel.java index a638939..2924ff8 100644 --- a/core/src/main/java/hivemall/model/SpaceEfficientDenseModel.java +++ b/core/src/main/java/hivemall/model/SpaceEfficientDenseModel.java @@ -190,7 +190,8 @@ public final class SpaceEfficientDenseModel extends AbstractPredictionModel { } @Override - public <T extends IWeightValue> void set(@Nonnull final Object feature, @Nonnull final T value) { + public <T extends IWeightValue> void set(@Nonnull final Object feature, + @Nonnull final T value) { int i = HiveUtils.parseInt(feature); ensureCapacity(i); float weight = value.get(); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/model/SparseModel.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/model/SparseModel.java b/core/src/main/java/hivemall/model/SparseModel.java index 8028cab..a44cbfb 100644 --- a/core/src/main/java/hivemall/model/SparseModel.java +++ b/core/src/main/java/hivemall/model/SparseModel.java @@ -76,7 +76,8 @@ public final class SparseModel extends AbstractPredictionModel { } @Override - public <T extends IWeightValue> void set(@Nonnull final Object feature, @Nonnull final T value) { + public <T extends IWeightValue> void set(@Nonnull final Object feature, + @Nonnull final T value) { final IWeightValue wrapperValue = wrapIfRequired(value); if (clockEnabled && value.isTouched()) { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/model/SynchronizedModelWrapper.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/model/SynchronizedModelWrapper.java b/core/src/main/java/hivemall/model/SynchronizedModelWrapper.java index 5c2ded1..bf50d75 100644 --- a/core/src/main/java/hivemall/model/SynchronizedModelWrapper.java +++ b/core/src/main/java/hivemall/model/SynchronizedModelWrapper.java @@ -127,7 +127,8 @@ public final class SynchronizedModelWrapper implements PredictionModel { } @Override - public <T extends IWeightValue> void set(@Nonnull final Object feature, @Nonnull final T value) { + public <T extends IWeightValue> void set(@Nonnull final Object feature, + @Nonnull final T value) { try { lock.lock(); model.set(feature, value); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/optimizer/DenseOptimizerFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/optimizer/DenseOptimizerFactory.java b/core/src/main/java/hivemall/optimizer/DenseOptimizerFactory.java index 37c8f7b..b1fe917 100644 --- a/core/src/main/java/hivemall/optimizer/DenseOptimizerFactory.java +++ b/core/src/main/java/hivemall/optimizer/DenseOptimizerFactory.java @@ -71,8 +71,8 @@ public final class DenseOptimizerFactory { } if (LOG.isInfoEnabled()) { - LOG.info("Configured " + optimizerImpl.getOptimizerName() + " as the optimizer: " - + options); + LOG.info( + "Configured " + optimizerImpl.getOptimizerName() + " as the optimizer: " + options); } return optimizerImpl; @@ -97,7 +97,8 @@ public final class DenseOptimizerFactory { } @Override - public float update(@Nonnull final Object feature, final float weight, final float gradient) { + public float update(@Nonnull final Object feature, final float weight, + final float gradient) { int i = HiveUtils.parseInt(feature); ensureCapacity(i); weightValueReused.set(weight); @@ -135,7 +136,8 @@ public final class DenseOptimizerFactory { } @Override - public float update(@Nonnull final Object feature, final float weight, final float gradient) { + public float update(@Nonnull final Object feature, final float weight, + final float gradient) { int i = HiveUtils.parseInt(feature); ensureCapacity(i); weightValueReused.set(weight); @@ -174,7 +176,8 @@ public final class DenseOptimizerFactory { } @Override - public float update(@Nonnull final Object feature, final float weight, final float gradient) { + public float update(@Nonnull final Object feature, final float weight, + final float gradient) { int i = HiveUtils.parseInt(feature); ensureCapacity(i); weightValueReused.set(weight); @@ -214,7 +217,8 @@ public final class DenseOptimizerFactory { } @Override - public float update(@Nonnull final Object feature, final float weight, final float gradient) { + public float update(@Nonnull final Object feature, final float weight, + final float gradient) { int i = HiveUtils.parseInt(feature); ensureCapacity(i); weightValueReused.set(weight); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/optimizer/LossFunctions.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/optimizer/LossFunctions.java b/core/src/main/java/hivemall/optimizer/LossFunctions.java index 54d7fe5..c4705c0 100644 --- a/core/src/main/java/hivemall/optimizer/LossFunctions.java +++ b/core/src/main/java/hivemall/optimizer/LossFunctions.java @@ -29,8 +29,8 @@ import javax.annotation.Nullable; public final class LossFunctions { public enum LossType { - SquaredLoss, QuantileLoss, EpsilonInsensitiveLoss, SquaredEpsilonInsensitiveLoss, - HuberLoss, HingeLoss, LogLoss, SquaredHingeLoss, ModifiedHuberLoss + SquaredLoss, QuantileLoss, EpsilonInsensitiveLoss, SquaredEpsilonInsensitiveLoss, HuberLoss, + HingeLoss, LogLoss, SquaredHingeLoss, ModifiedHuberLoss } @Nonnull http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/optimizer/Optimizer.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/optimizer/Optimizer.java b/core/src/main/java/hivemall/optimizer/Optimizer.java index 7bf1e84..4b1ef0a 100644 --- a/core/src/main/java/hivemall/optimizer/Optimizer.java +++ b/core/src/main/java/hivemall/optimizer/Optimizer.java @@ -96,7 +96,8 @@ public interface Optimizer { } @Override - public float update(@Nonnull final Object feature, final float weight, final float gradient) { + public float update(@Nonnull final Object feature, final float weight, + final float gradient) { weightValueReused.set(weight); update(weightValueReused, gradient); return weightValueReused.get(); @@ -122,8 +123,8 @@ public interface Optimizer { @Override protected float computeDelta(@Nonnull final IWeightValue weight, final float gradient) { - float new_scaled_sum_sqgrad = weight.getSumOfSquaredGradients() + gradient - * (gradient / scale); + float new_scaled_sum_sqgrad = + weight.getSumOfSquaredGradients() + gradient * (gradient / scale); weight.setSumOfSquaredGradients(new_scaled_sum_sqgrad); return gradient / ((float) Math.sqrt(new_scaled_sum_sqgrad * scale) + eps); } @@ -154,11 +155,10 @@ public interface Optimizer { float old_sum_squared_delta_x = weight.getSumOfSquaredDeltaX(); float new_scaled_sum_sqgrad = (decay * old_scaled_sum_sqgrad) + ((1.f - decay) * gradient * (gradient / scale)); - float delta = (float) Math.sqrt((old_sum_squared_delta_x + eps) - / (new_scaled_sum_sqgrad * scale + eps)) - * gradient; - float new_sum_squared_delta_x = (decay * old_sum_squared_delta_x) - + ((1.f - decay) * delta * delta); + float delta = (float) Math.sqrt( + (old_sum_squared_delta_x + eps) / (new_scaled_sum_sqgrad * scale + eps)) * gradient; + float new_sum_squared_delta_x = + (decay * old_sum_squared_delta_x) + ((1.f - decay) * delta * delta); weight.setSumOfSquaredGradients(new_scaled_sum_sqgrad); weight.setSumOfSquaredDeltaX(new_sum_squared_delta_x); return delta; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/optimizer/OptimizerOptions.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/optimizer/OptimizerOptions.java b/core/src/main/java/hivemall/optimizer/OptimizerOptions.java index 741c888..2fc838d 100644 --- a/core/src/main/java/hivemall/optimizer/OptimizerOptions.java +++ b/core/src/main/java/hivemall/optimizer/OptimizerOptions.java @@ -61,7 +61,8 @@ public final class OptimizerOptions { opts.addOption("scale", true, "Scaling factor for cumulative weights [100.0]"); } - public static void processOptions(@Nullable CommandLine cl, @Nonnull Map<String, String> options) { + public static void processOptions(@Nullable CommandLine cl, + @Nonnull Map<String, String> options) { if (cl == null) { return; } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/optimizer/Regularization.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/optimizer/Regularization.java b/core/src/main/java/hivemall/optimizer/Regularization.java index 9650826..674371c 100644 --- a/core/src/main/java/hivemall/optimizer/Regularization.java +++ b/core/src/main/java/hivemall/optimizer/Regularization.java @@ -97,15 +97,15 @@ public abstract class Regularization { this.l1Ratio = Primitives.parseFloat(options.get("l1_ratio"), DEFAULT_L1_RATIO); if (l1Ratio < 0.f || l1Ratio > 1.f) { - throw new IllegalArgumentException("L1 ratio should be in [0.0, 1.0], but got " - + l1Ratio); + throw new IllegalArgumentException( + "L1 ratio should be in [0.0, 1.0], but got " + l1Ratio); } } @Override public float getRegularizer(final float weight) { - return l1Ratio * l1.getRegularizer(weight) + (1.f - l1Ratio) - * l2.getRegularizer(weight); + return l1Ratio * l1.getRegularizer(weight) + + (1.f - l1Ratio) * l2.getRegularizer(weight); } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/optimizer/SparseOptimizerFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/optimizer/SparseOptimizerFactory.java b/core/src/main/java/hivemall/optimizer/SparseOptimizerFactory.java index 153215d..7cf61d8 100644 --- a/core/src/main/java/hivemall/optimizer/SparseOptimizerFactory.java +++ b/core/src/main/java/hivemall/optimizer/SparseOptimizerFactory.java @@ -70,8 +70,8 @@ public final class SparseOptimizerFactory { } if (LOG.isInfoEnabled()) { - LOG.info("Configured " + optimizerImpl.getOptimizerName() + " as the optimizer: " - + options); + LOG.info( + "Configured " + optimizerImpl.getOptimizerName() + " as the optimizer: " + options); } return optimizerImpl; @@ -89,7 +89,8 @@ public final class SparseOptimizerFactory { } @Override - public float update(@Nonnull final Object feature, final float weight, final float gradient) { + public float update(@Nonnull final Object feature, final float weight, + final float gradient) { IWeightValue auxWeight = auxWeights.get(feature); if (auxWeight == null) { auxWeight = new WeightValue.WeightValueParamsF2(weight, 0.f, 0.f); @@ -114,7 +115,8 @@ public final class SparseOptimizerFactory { } @Override - public float update(@Nonnull final Object feature, final float weight, final float gradient) { + public float update(@Nonnull final Object feature, final float weight, + final float gradient) { IWeightValue auxWeight = auxWeights.get(feature); if (auxWeight == null) { auxWeight = new WeightValue.WeightValueParamsF2(weight, 0.f, 0.f); @@ -139,7 +141,8 @@ public final class SparseOptimizerFactory { } @Override - public float update(@Nonnull final Object feature, final float weight, final float gradient) { + public float update(@Nonnull final Object feature, final float weight, + final float gradient) { IWeightValue auxWeight = auxWeights.get(feature); if (auxWeight == null) { auxWeight = new WeightValue.WeightValueParamsF2(weight, 0.f, 0.f); @@ -165,7 +168,8 @@ public final class SparseOptimizerFactory { } @Override - public float update(@Nonnull final Object feature, final float weight, final float gradient) { + public float update(@Nonnull final Object feature, final float weight, + final float gradient) { IWeightValue auxWeight = auxWeights.get(feature); if (auxWeight == null) { auxWeight = new WeightValue.WeightValueParamsF2(weight, 0.f, 0.f); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/recommend/SlimUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/recommend/SlimUDTF.java b/core/src/main/java/hivemall/recommend/SlimUDTF.java index 251c0d4..a047e6f 100644 --- a/core/src/main/java/hivemall/recommend/SlimUDTF.java +++ b/core/src/main/java/hivemall/recommend/SlimUDTF.java @@ -78,8 +78,7 @@ import org.apache.hadoop.mapred.Reporter; * Xia Ning and George Karypis, SLIM: Sparse Linear Methods for Top-N Recommender Systems, Proc. ICDM, 2011. * </pre> */ -@Description( - name = "train_slim", +@Description(name = "train_slim", value = "_FUNC_( int i, map<int, double> r_i, map<int, map<int, double>> topKRatesOfI, int j, map<int, double> r_j [, constant string options]) " + "- Returns row index, column index and non-zero weight value of prediction model") public class SlimUDTF extends UDTFWithOptions { @@ -173,8 +172,10 @@ public class SlimUDTF extends UDTFWithOptions { this.knnItemsOI = HiveUtils.asMapOI(argOIs[2]); this.knnItemsKeyOI = HiveUtils.asIntCompatibleOI(knnItemsOI.getMapKeyObjectInspector()); this.knnItemsValueOI = HiveUtils.asMapOI(knnItemsOI.getMapValueObjectInspector()); - this.knnItemsValueKeyOI = HiveUtils.asIntCompatibleOI(knnItemsValueOI.getMapKeyObjectInspector()); - this.knnItemsValueValueOI = HiveUtils.asDoubleCompatibleOI(knnItemsValueOI.getMapValueObjectInspector()); + this.knnItemsValueKeyOI = + HiveUtils.asIntCompatibleOI(knnItemsValueOI.getMapKeyObjectInspector()); + this.knnItemsValueValueOI = + HiveUtils.asDoubleCompatibleOI(knnItemsValueOI.getMapValueObjectInspector()); this.itemJOI = HiveUtils.asIntCompatibleOI(argOIs[3]); @@ -244,8 +245,8 @@ public class SlimUDTF extends UDTFWithOptions { numIterations = Primitives.parseInt(cl.getOptionValue("iters"), numIterations); if (numIterations <= 0) { - throw new UDFArgumentException("Argument `int iters` must be greater than 0: " - + numIterations); + throw new UDFArgumentException( + "Argument `int iters` must be greater than 0: " + numIterations); } conversionCheck = !cl.hasOption("disable_cvtest"); @@ -279,8 +280,8 @@ public class SlimUDTF extends UDTFWithOptions { if (itemI != _previousItemId || _ri == null) { // cache Ri and kNNi - this._ri = int2floatMap(itemI, riOI.getMap(args[1]), riKeyOI, riValueOI, _dataMatrix, - _ri); + this._ri = + int2floatMap(itemI, riOI.getMap(args[1]), riKeyOI, riValueOI, _dataMatrix, _ri); this._kNNi = kNNentries(args[2], knnItemsOI, knnItemsKeyOI, knnItemsValueOI, knnItemsValueKeyOI, knnItemsValueValueOI, _kNNi, _nnzKNNi); @@ -292,7 +293,8 @@ public class SlimUDTF extends UDTFWithOptions { } int itemJ = PrimitiveObjectInspectorUtils.getInt(args[3], itemJOI); - Int2FloatMap rj = int2floatMap(itemJ, rjOI.getMap(args[4]), rjKeyOI, rjValueOI, _dataMatrix); + Int2FloatMap rj = + int2floatMap(itemJ, rjOI.getMap(args[4]), rjKeyOI, rjValueOI, _dataMatrix); train(itemI, _ri, _kNNi, itemJ, rj); _observedTrainingExamples++; @@ -311,8 +313,8 @@ public class SlimUDTF extends UDTFWithOptions { file = File.createTempFile("hivemall_slim", ".sgmt"); // to save KNN data file.deleteOnExit(); if (!file.canWrite()) { - throw new UDFArgumentException("Cannot write a temporary file: " - + file.getAbsolutePath()); + throw new UDFArgumentException( + "Cannot write a temporary file: " + file.getAbsolutePath()); } } catch (IOException ioe) { throw new UDFArgumentException(ioe); @@ -450,8 +452,8 @@ public class SlimUDTF extends UDTFWithOptions { return pred; } - private static double getUpdateTerm(final double gradSum, final double rateSum, - final double l1, final double l2) { + private static double getUpdateTerm(final double gradSum, final double rateSum, final double l1, + final double l2) { double update = 0.d; if (Math.abs(gradSum) > l1) { if (gradSum > 0.d) { @@ -493,8 +495,8 @@ public class SlimUDTF extends UDTFWithOptions { assert (dst != null); final Reporter reporter = getReporter(); - final Counters.Counter iterCounter = (reporter == null) ? null : reporter.getCounter( - "hivemall.recommend.slim$Counter", "iteration"); + final Counters.Counter iterCounter = (reporter == null) ? null + : reporter.getCounter("hivemall.recommend.slim$Counter", "iteration"); try { if (dst.getPosition() == 0L) {// run iterations w/o temporary file @@ -517,13 +519,12 @@ public class SlimUDTF extends UDTFWithOptions { break; } } - logger.info("Performed " - + _cvState.getCurrentIteration() - + " iterations of " + logger.info("Performed " + _cvState.getCurrentIteration() + " iterations of " + NumberUtils.formatNumber(_observedTrainingExamples) + " training examples on memory (thus " - + NumberUtils.formatNumber(_observedTrainingExamples - * _cvState.getCurrentIteration()) + " training updates in total) "); + + NumberUtils.formatNumber( + _observedTrainingExamples * _cvState.getCurrentIteration()) + + " training updates in total) "); } else { // read training examples in the temporary file and invoke train for each example // write KNNi in buffer to a temporary file @@ -534,17 +535,16 @@ public class SlimUDTF extends UDTFWithOptions { try { dst.flush(); } catch (IOException e) { - throw new HiveException("Failed to flush a file: " - + dst.getFile().getAbsolutePath(), e); + throw new HiveException( + "Failed to flush a file: " + dst.getFile().getAbsolutePath(), e); } if (logger.isInfoEnabled()) { File tmpFile = dst.getFile(); - logger.info("Wrote KNN entries of axis items to a temporary file for iterative training: " - + tmpFile.getAbsolutePath() - + " (" - + FileUtils.prettyFileSize(tmpFile) - + ")"); + logger.info( + "Wrote KNN entries of axis items to a temporary file for iterative training: " + + tmpFile.getAbsolutePath() + " (" + + FileUtils.prettyFileSize(tmpFile) + ")"); } // run iterations @@ -561,8 +561,8 @@ public class SlimUDTF extends UDTFWithOptions { try { bytesRead = dst.read(buf); } catch (IOException e) { - throw new HiveException("Failed to read a file: " - + dst.getFile().getAbsolutePath(), e); + throw new HiveException( + "Failed to read a file: " + dst.getFile().getAbsolutePath(), e); } if (bytesRead == 0) { // reached file EOF break; @@ -593,13 +593,12 @@ public class SlimUDTF extends UDTFWithOptions { break; } } - logger.info("Performed " - + _cvState.getCurrentIteration() - + " iterations of " + logger.info("Performed " + _cvState.getCurrentIteration() + " iterations of " + NumberUtils.formatNumber(_observedTrainingExamples) + " training examples on memory and KNNi data on secondary storage (thus " - + NumberUtils.formatNumber(_observedTrainingExamples - * _cvState.getCurrentIteration()) + " training updates in total) "); + + NumberUtils.formatNumber( + _observedTrainingExamples * _cvState.getCurrentIteration()) + + " training updates in total) "); } } catch (Throwable e) { @@ -609,8 +608,8 @@ public class SlimUDTF extends UDTFWithOptions { try { dst.close(true); } catch (IOException e) { - throw new HiveException("Failed to close a file: " - + dst.getFile().getAbsolutePath(), e); + throw new HiveException( + "Failed to close a file: " + dst.getFile().getAbsolutePath(), e); } this._inputBuf = null; this._fileIO = null; @@ -722,15 +721,16 @@ public class SlimUDTF extends UDTFWithOptions { @Nonnull private static Int2FloatMap int2floatMap(final int item, @Nonnull final Map<?, ?> map, @Nonnull final PrimitiveObjectInspector keyOI, - @Nonnull final PrimitiveObjectInspector valueOI, @Nullable final FloatMatrix dataMatrix) { + @Nonnull final PrimitiveObjectInspector valueOI, + @Nullable final FloatMatrix dataMatrix) { return int2floatMap(item, map, keyOI, valueOI, dataMatrix, null); } @Nonnull private static Int2FloatMap int2floatMap(final int item, @Nonnull final Map<?, ?> map, @Nonnull final PrimitiveObjectInspector keyOI, - @Nonnull final PrimitiveObjectInspector valueOI, - @Nullable final FloatMatrix dataMatrix, @Nullable Int2FloatMap dst) { + @Nonnull final PrimitiveObjectInspector valueOI, @Nullable final FloatMatrix dataMatrix, + @Nullable Int2FloatMap dst) { if (dst == null) { dst = new Int2FloatOpenHashMap(map.size()); dst.defaultReturnValue(0.f); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/regression/AROWRegressionUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/regression/AROWRegressionUDTF.java b/core/src/main/java/hivemall/regression/AROWRegressionUDTF.java index 9a0978d..f393a3b 100644 --- a/core/src/main/java/hivemall/regression/AROWRegressionUDTF.java +++ b/core/src/main/java/hivemall/regression/AROWRegressionUDTF.java @@ -34,8 +34,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -@Description( - name = "train_arow_regr", +@Description(name = "train_arow_regr", value = "_FUNC_(array<int|bigint|string> features, float target [, constant string options])" + " - Returns a relation consists of <{int|bigint|string} feature, float weight, float covar>") public class AROWRegressionUDTF extends RegressionBaseUDTF { @@ -142,8 +141,7 @@ public class AROWRegressionUDTF extends RegressionBaseUDTF { return new WeightValueWithCovar(new_w, new_cov); } - @Description( - name = "train_arowe_regr", + @Description(name = "train_arowe_regr", value = "_FUNC_(array<int|bigint|string> features, float target [, constant string options])" + " - Returns a relation consists of <{int|bigint|string} feature, float weight, float covar>") public static class AROWe extends AROWRegressionUDTF { @@ -154,7 +152,8 @@ public class AROWRegressionUDTF extends RegressionBaseUDTF { @Override protected Options getOptions() { Options opts = super.getOptions(); - opts.addOption("e", "epsilon", true, "Sensitivity to prediction mistakes [default 0.1]"); + opts.addOption("e", "epsilon", true, + "Sensitivity to prediction mistakes [default 0.1]"); return opts; } @@ -200,8 +199,7 @@ public class AROWRegressionUDTF extends RegressionBaseUDTF { } } - @Description( - name = "train_arowe2_regr", + @Description(name = "train_arowe2_regr", value = "_FUNC_(array<int|bigint|string> features, float target [, constant string options])" + " - Returns a relation consists of <{int|bigint|string} feature, float weight, float covar>") public static class AROWe2 extends AROWe { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/regression/AdaDeltaUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/regression/AdaDeltaUDTF.java b/core/src/main/java/hivemall/regression/AdaDeltaUDTF.java index 81ff550..73edf37 100644 --- a/core/src/main/java/hivemall/regression/AdaDeltaUDTF.java +++ b/core/src/main/java/hivemall/regression/AdaDeltaUDTF.java @@ -40,8 +40,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; * @deprecated Use {@link hivemall.regression.GeneralRegressorUDTF} instead */ @Deprecated -@Description( - name = "train_adadelta_regr", +@Description(name = "train_adadelta_regr", value = "_FUNC_(array<int|bigint|string> features, float target [, constant string options])" + " - Returns a relation consists of <{int|bigint|string} feature, float weight>") public final class AdaDeltaUDTF extends RegressionBaseUDTF { @@ -131,11 +130,10 @@ public final class AdaDeltaUDTF extends RegressionBaseUDTF { } float new_scaled_sum_sq_grad = (decay * old_scaled_sum_sqgrad) + ((1.f - decay) * g_g); - float dx = (float) Math.sqrt((old_sum_squared_delta_x + eps) - / (old_scaled_sum_sqgrad * scaling + eps)) - * gradient; - float new_sum_squared_delta_x = (decay * old_sum_squared_delta_x) - + ((1.f - decay) * dx * dx); + float dx = (float) Math.sqrt( + (old_sum_squared_delta_x + eps) / (old_scaled_sum_sqgrad * scaling + eps)) * gradient; + float new_sum_squared_delta_x = + (decay * old_sum_squared_delta_x) + ((1.f - decay) * dx * dx); float new_w = old_w + (dx * xi); return new WeightValueParamsF2(new_w, new_scaled_sum_sq_grad, new_sum_squared_delta_x); } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/regression/AdaGradUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/regression/AdaGradUDTF.java b/core/src/main/java/hivemall/regression/AdaGradUDTF.java index 9f01874..698ca6d 100644 --- a/core/src/main/java/hivemall/regression/AdaGradUDTF.java +++ b/core/src/main/java/hivemall/regression/AdaGradUDTF.java @@ -40,8 +40,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; * @deprecated Use {@link hivemall.regression.GeneralRegressorUDTF} instead */ @Deprecated -@Description( - name = "train_adagrad_regr", +@Description(name = "train_adagrad_regr", value = "_FUNC_(array<int|bigint|string> features, float target [, constant string options])" + " - Returns a relation consists of <{int|bigint|string} feature, float weight>") public final class AdaGradUDTF extends RegressionBaseUDTF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/regression/LogressUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/regression/LogressUDTF.java b/core/src/main/java/hivemall/regression/LogressUDTF.java index a5670df..4d9102a 100644 --- a/core/src/main/java/hivemall/regression/LogressUDTF.java +++ b/core/src/main/java/hivemall/regression/LogressUDTF.java @@ -34,8 +34,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; * @deprecated Use {@link hivemall.regression.GeneralRegressorUDTF} instead */ @Deprecated -@Description( - name = "logress", +@Description(name = "logress", value = "_FUNC_(array<int|bigint|string> features, float target [, constant string options])" + " - Returns a relation consists of <{int|bigint|string} feature, float weight>") public final class LogressUDTF extends RegressionBaseUDTF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/regression/PassiveAggressiveRegressionUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/regression/PassiveAggressiveRegressionUDTF.java b/core/src/main/java/hivemall/regression/PassiveAggressiveRegressionUDTF.java index ff5049a..c0e53f0 100644 --- a/core/src/main/java/hivemall/regression/PassiveAggressiveRegressionUDTF.java +++ b/core/src/main/java/hivemall/regression/PassiveAggressiveRegressionUDTF.java @@ -32,8 +32,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -@Description( - name = "train_pa1_regr", +@Description(name = "train_pa1_regr", value = "_FUNC_(array<int|bigint|string> features, float target [, constant string options])" + " - Returns a relation consists of <{int|bigint|string} feature, float weight>") public class PassiveAggressiveRegressionUDTF extends RegressionBaseUDTF { @@ -76,7 +75,8 @@ public class PassiveAggressiveRegressionUDTF extends RegressionBaseUDTF { if (opt_c != null) { c = Float.parseFloat(opt_c); if (!(c > 0.f)) { - throw new UDFArgumentException("Aggressiveness parameter C must be C > 0: " + c); + throw new UDFArgumentException( + "Aggressiveness parameter C must be C > 0: " + c); } } @@ -131,8 +131,7 @@ public class PassiveAggressiveRegressionUDTF extends RegressionBaseUDTF { return Math.min(c, eta); } - @Description( - name = "train_pa1a_regr", + @Description(name = "train_pa1a_regr", value = "_FUNC_(array<int|bigint|string> features, float target [, constant string options])" + " - Returns a relation consists of <{int|bigint|string} feature, float weight>") public static final class PA1a extends PassiveAggressiveRegressionUDTF { @@ -160,8 +159,7 @@ public class PassiveAggressiveRegressionUDTF extends RegressionBaseUDTF { } - @Description( - name = "train_pa2_regr", + @Description(name = "train_pa2_regr", value = "_FUNC_(array<int|bigint|string> features, float target [, constant string options])" + " - Returns a relation consists of <{int|bigint|string} feature, float weight>") public static class PA2 extends PassiveAggressiveRegressionUDTF { @@ -180,8 +178,7 @@ public class PassiveAggressiveRegressionUDTF extends RegressionBaseUDTF { } - @Description( - name = "train_pa2a_regr", + @Description(name = "train_pa2a_regr", value = "_FUNC_(array<int|bigint|string> features, float target [, constant string options])" + " - Returns a relation consists of <{int|bigint|string} feature, float weight>") public static final class PA2a extends PA2 { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/sketch/bloom/BloomAndUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/sketch/bloom/BloomAndUDF.java b/core/src/main/java/hivemall/sketch/bloom/BloomAndUDF.java index 9b029d4..87769da 100644 --- a/core/src/main/java/hivemall/sketch/bloom/BloomAndUDF.java +++ b/core/src/main/java/hivemall/sketch/bloom/BloomAndUDF.java @@ -30,8 +30,7 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.util.bloom.DynamicBloomFilter; import org.apache.hadoop.util.bloom.Filter; -@Description( - name = "bloom_and", +@Description(name = "bloom_and", value = "_FUNC_(string bloom1, string bloom2) - Returns the logical AND of two bloom filters") @UDFType(deterministic = true, stateful = false) public final class BloomAndUDF extends UDF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/sketch/bloom/BloomContainsUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/sketch/bloom/BloomContainsUDF.java b/core/src/main/java/hivemall/sketch/bloom/BloomContainsUDF.java index 2aa7510..2da65b3 100644 --- a/core/src/main/java/hivemall/sketch/bloom/BloomContainsUDF.java +++ b/core/src/main/java/hivemall/sketch/bloom/BloomContainsUDF.java @@ -32,8 +32,7 @@ import org.apache.hadoop.util.bloom.DynamicBloomFilter; import org.apache.hadoop.util.bloom.Filter; import org.apache.hadoop.util.bloom.Key; -@Description( - name = "bloom_contains", +@Description(name = "bloom_contains", value = "_FUNC_(string bloom, string key) - Returns true if the bloom filter contains the given key") @UDFType(deterministic = true, stateful = false) public final class BloomContainsUDF extends UDF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/sketch/bloom/BloomOrUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/sketch/bloom/BloomOrUDF.java b/core/src/main/java/hivemall/sketch/bloom/BloomOrUDF.java index 7f60be4..7d2980e 100644 --- a/core/src/main/java/hivemall/sketch/bloom/BloomOrUDF.java +++ b/core/src/main/java/hivemall/sketch/bloom/BloomOrUDF.java @@ -30,8 +30,7 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.util.bloom.DynamicBloomFilter; import org.apache.hadoop.util.bloom.Filter; -@Description( - name = "bloom_or", +@Description(name = "bloom_or", value = "_FUNC_(string bloom1, string bloom2) - Returns the logical OR of two bloom filters") @UDFType(deterministic = true, stateful = false) public final class BloomOrUDF extends UDF { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/sketch/hll/ApproxCountDistinctUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/sketch/hll/ApproxCountDistinctUDAF.java b/core/src/main/java/hivemall/sketch/hll/ApproxCountDistinctUDAF.java index 2575026..f5e6c05 100644 --- a/core/src/main/java/hivemall/sketch/hll/ApproxCountDistinctUDAF.java +++ b/core/src/main/java/hivemall/sketch/hll/ApproxCountDistinctUDAF.java @@ -55,7 +55,8 @@ import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus; public final class ApproxCountDistinctUDAF extends AbstractGenericUDAFResolver { @Override - public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) + throws SemanticException { if (typeInfo.length != 1 && typeInfo.length != 2) { throw new UDFArgumentTypeException(typeInfo.length - 1, "_FUNC_ takes one or two arguments"); @@ -162,7 +163,8 @@ public final class ApproxCountDistinctUDAF extends AbstractGenericUDAFResolver { } HLLBuffer buf = (HLLBuffer) agg; - Object value = ObjectInspectorUtils.copyToStandardJavaObject(parameters[0], origInputOI); + Object value = + ObjectInspectorUtils.copyToStandardJavaObject(parameters[0], origInputOI); Preconditions.checkNotNull(buf.hll, HiveException.class); buf.hll.offer(value); } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/smile/classification/DecisionTree.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/smile/classification/DecisionTree.java b/core/src/main/java/hivemall/smile/classification/DecisionTree.java index 76164f4..cc92ae8 100644 --- a/core/src/main/java/hivemall/smile/classification/DecisionTree.java +++ b/core/src/main/java/hivemall/smile/classification/DecisionTree.java @@ -270,8 +270,8 @@ public final class DecisionTree implements Classifier<Vector> { return falseChild.predict(x); } } else { - throw new IllegalStateException("Unsupported attribute type: " - + splitFeatureType); + throw new IllegalStateException( + "Unsupported attribute type: " + splitFeatureType); } } } @@ -296,8 +296,8 @@ public final class DecisionTree implements Classifier<Vector> { falseChild.predict(x, handler); } } else { - throw new IllegalStateException("Unsupported attribute type: " - + splitFeatureType); + throw new IllegalStateException( + "Unsupported attribute type: " + splitFeatureType); } } } @@ -339,8 +339,8 @@ public final class DecisionTree implements Classifier<Vector> { .append(" ) {\n"); } } else { - throw new IllegalStateException("Unsupported attribute type: " - + splitFeatureType); + throw new IllegalStateException( + "Unsupported attribute type: " + splitFeatureType); } trueChild.exportJavascript(builder, featureNames, classNames, depth + 1); indent(builder, depth); @@ -363,42 +363,48 @@ public final class DecisionTree implements Classifier<Vector> { // http://www.graphviz.org/doc/info/attrs.html#k:colorList String hsvColor = (colorBrew == null || output >= colorBrew.length) ? "#00000000" : String.format("%.4f,1.000,1.000", colorBrew[output]); - builder.append(String.format( - " %d [label=<%s = %s>, fillcolor=\"%s\", shape=ellipse];\n", myNodeId, - outputName, resolveName(output, classNames), hsvColor)); + builder.append( + String.format(" %d [label=<%s = %s>, fillcolor=\"%s\", shape=ellipse];\n", + myNodeId, outputName, resolveName(output, classNames), hsvColor)); if (myNodeId != parentNodeId) { builder.append(' ').append(parentNodeId).append(" -> ").append(myNodeId); if (parentNodeId == 0) { if (myNodeId == 1) { - builder.append(" [labeldistance=2.5, labelangle=45, headlabel=\"True\"]"); + builder.append( + " [labeldistance=2.5, labelangle=45, headlabel=\"True\"]"); } else { - builder.append(" [labeldistance=2.5, labelangle=-45, headlabel=\"False\"]"); + builder.append( + " [labeldistance=2.5, labelangle=-45, headlabel=\"False\"]"); } } builder.append(";\n"); } } else { if (splitFeatureType == AttributeType.NOMINAL) { - builder.append(String.format( - " %d [label=<%s = %s>, fillcolor=\"#00000000\"];\n", myNodeId, - resolveFeatureName(splitFeature, featureNames), Double.toString(splitValue))); + builder.append( + String.format(" %d [label=<%s = %s>, fillcolor=\"#00000000\"];\n", myNodeId, + resolveFeatureName(splitFeature, featureNames), + Double.toString(splitValue))); } else if (splitFeatureType == AttributeType.NUMERIC) { - builder.append(String.format( - " %d [label=<%s ≤ %s>, fillcolor=\"#00000000\"];\n", myNodeId, - resolveFeatureName(splitFeature, featureNames), Double.toString(splitValue))); + builder.append( + String.format(" %d [label=<%s ≤ %s>, fillcolor=\"#00000000\"];\n", + myNodeId, resolveFeatureName(splitFeature, featureNames), + Double.toString(splitValue))); } else { - throw new IllegalStateException("Unsupported attribute type: " - + splitFeatureType); + throw new IllegalStateException( + "Unsupported attribute type: " + splitFeatureType); } if (myNodeId != parentNodeId) { builder.append(' ').append(parentNodeId).append(" -> ").append(myNodeId); if (parentNodeId == 0) {//only draw edge label on top if (myNodeId == 1) { - builder.append(" [labeldistance=2.5, labelangle=45, headlabel=\"True\"]"); + builder.append( + " [labeldistance=2.5, labelangle=45, headlabel=\"True\"]"); } else { - builder.append(" [labeldistance=2.5, labelangle=-45, headlabel=\"False\"]"); + builder.append( + " [labeldistance=2.5, labelangle=-45, headlabel=\"False\"]"); } } builder.append(";\n"); @@ -458,8 +464,8 @@ public final class DecisionTree implements Classifier<Vector> { int falseDepth = falseChild.opCodegen(scripts, depth + trueDepth); selfDepth += falseDepth; } else { - throw new IllegalStateException("Unsupported attribute type: " - + splitFeatureType); + throw new IllegalStateException( + "Unsupported attribute type: " + splitFeatureType); } } return selfDepth; @@ -605,12 +611,12 @@ public final class DecisionTree implements Classifier<Vector> { final double impurity = impurity(count, numSamples, _rule); - final int[] samples = _hasNumericType ? SmileExtUtils.bagsToSamples(bags, x.numRows()) - : null; + final int[] samples = + _hasNumericType ? SmileExtUtils.bagsToSamples(bags, x.numRows()) : null; final int[] falseCount = new int[_k]; for (int varJ : variableIndex(x, bags)) { - final Node split = findBestSplit(numSamples, count, falseCount, impurity, varJ, - samples); + final Node split = + findBestSplit(numSamples, count, falseCount, impurity, varJ, samples); if (split.splitScore > node.splitScore) { node.splitFeature = split.splitFeature; node.splitFeatureType = split.splitFeatureType; @@ -707,9 +713,9 @@ public final class DecisionTree implements Classifier<Vector> { falseCount[q] = count[q] - trueCount[l][q]; } - final double gain = impurity - (double) tc / n - * impurity(trueCount[l], tc, _rule) - (double) fc / n - * impurity(falseCount, fc, _rule); + final double gain = + impurity - (double) tc / n * impurity(trueCount[l], tc, _rule) + - (double) fc / n * impurity(falseCount, fc, _rule); if (gain > splitNode.splitScore) { // new best split @@ -762,9 +768,9 @@ public final class DecisionTree implements Classifier<Vector> { falseCount[l] = count[l] - trueCount[l]; } - final double gain = impurity - (double) tc / n - * impurity(trueCount, tc, _rule) - (double) fc / n - * impurity(falseCount, fc, _rule); + final double gain = + impurity - (double) tc / n * impurity(trueCount, tc, _rule) + - (double) fc / n * impurity(falseCount, fc, _rule); if (gain > splitNode.splitScore) { // new best split @@ -782,8 +788,8 @@ public final class DecisionTree implements Classifier<Vector> { }//apply() }); } else { - throw new IllegalStateException("Unsupported attribute type: " - + _attributes[j].type); + throw new IllegalStateException( + "Unsupported attribute type: " + _attributes[j].type); } return splitNode; @@ -822,7 +828,8 @@ public final class DecisionTree implements Classifier<Vector> { } node.trueChild = new Node(node.trueChildOutput, trueChildPosteriori); - TrainNode trueChild = new TrainNode(node.trueChild, x, y, trueBags.toArray(), depth + 1); + TrainNode trueChild = + new TrainNode(node.trueChild, x, y, trueBags.toArray(), depth + 1); trueBags = null; // help GC for recursive call if (tc >= _minSplit && trueChild.findBestSplit()) { if (nextSplits != null) { @@ -833,8 +840,8 @@ public final class DecisionTree implements Classifier<Vector> { } node.falseChild = new Node(node.falseChildOutput, falseChildPosteriori); - TrainNode falseChild = new TrainNode(node.falseChild, x, y, falseBags.toArray(), - depth + 1); + TrainNode falseChild = + new TrainNode(node.falseChild, x, y, falseBags.toArray(), depth + 1); falseBags = null; // help GC for recursive call if (fc >= _minSplit && falseChild.findBestSplit()) { if (nextSplits != null) { @@ -888,8 +895,8 @@ public final class DecisionTree implements Classifier<Vector> { } } } else { - throw new IllegalStateException("Unsupported attribute type: " - + node.splitFeatureType); + throw new IllegalStateException( + "Unsupported attribute type: " + node.splitFeatureType); } return tc; } @@ -986,8 +993,8 @@ public final class DecisionTree implements Classifier<Vector> { this._attributes = SmileExtUtils.attributeTypes(attributes, x); if (attributes.length != x.numColumns()) { - throw new IllegalArgumentException("-attrs option is invalid: " - + Arrays.toString(attributes)); + throw new IllegalArgumentException( + "-attrs option is invalid: " + Arrays.toString(attributes)); } this._hasNumericType = SmileExtUtils.containsNumericType(_attributes); @@ -1054,8 +1061,8 @@ public final class DecisionTree implements Classifier<Vector> { private static void checkArgument(@Nonnull Matrix x, @Nonnull int[] y, int numVars, int maxDepth, int maxLeafs, int minSplits, int minLeafSize) { if (x.numRows() != y.length) { - throw new IllegalArgumentException(String.format( - "The sizes of X and Y don't match: %d != %d", x.numRows(), y.length)); + throw new IllegalArgumentException( + String.format("The sizes of X and Y don't match: %d != %d", x.numRows(), y.length)); } if (numVars <= 0 || numVars > x.numColumns()) { throw new IllegalArgumentException( @@ -1073,7 +1080,8 @@ public final class DecisionTree implements Classifier<Vector> { + minSplits); } if (minLeafSize < 1) { - throw new IllegalArgumentException("Invalid minimum size of leaf nodes: " + minLeafSize); + throw new IllegalArgumentException( + "Invalid minimum size of leaf nodes: " + minLeafSize); } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/c4036695/core/src/main/java/hivemall/smile/classification/GradientTreeBoostingClassifierUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/smile/classification/GradientTreeBoostingClassifierUDTF.java b/core/src/main/java/hivemall/smile/classification/GradientTreeBoostingClassifierUDTF.java index d94dc4c..1a4fa1c 100644 --- a/core/src/main/java/hivemall/smile/classification/GradientTreeBoostingClassifierUDTF.java +++ b/core/src/main/java/hivemall/smile/classification/GradientTreeBoostingClassifierUDTF.java @@ -164,8 +164,8 @@ public final class GradientTreeBoostingClassifierUDTF extends UDTFWithOptions { maxDepth = Primitives.parseInt(cl.getOptionValue("max_depth"), maxDepth); maxLeafs = Primitives.parseInt(cl.getOptionValue("max_leaf_nodes"), maxLeafs); minSplit = Primitives.parseInt(cl.getOptionValue("min_split"), minSplit); - minSamplesLeaf = Primitives.parseInt(cl.getOptionValue("min_samples_leaf"), - minSamplesLeaf); + minSamplesLeaf = + Primitives.parseInt(cl.getOptionValue("min_samples_leaf"), minSamplesLeaf); seed = Primitives.parseLong(cl.getOptionValue("seed"), seed); attrs = SmileExtUtils.resolveAttributes(cl.getOptionValue("attribute_types")); } @@ -187,10 +187,9 @@ public final class GradientTreeBoostingClassifierUDTF extends UDTFWithOptions { @Override public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { if (argOIs.length != 2 && argOIs.length != 3) { - throw new UDFArgumentException( - getClass().getSimpleName() - + " takes 2 or 3 arguments: array<double|string> features, int label [, const string options]: " - + argOIs.length); + throw new UDFArgumentException(getClass().getSimpleName() + + " takes 2 or 3 arguments: array<double|string> features, int label [, const string options]: " + + argOIs.length); } ListObjectInspector listOI = HiveUtils.asListOI(argOIs[0]); @@ -206,7 +205,8 @@ public final class GradientTreeBoostingClassifierUDTF extends UDTFWithOptions { this.matrixBuilder = new CSRMatrixBuilder(8192); } else { throw new UDFArgumentException( - "_FUNC_ takes double[] or string[] for the first argument: " + listOI.getTypeName()); + "_FUNC_ takes double[] or string[] for the first argument: " + + listOI.getTypeName()); } this.labelOI = HiveUtils.asIntCompatibleOI(argOIs[1]); @@ -220,13 +220,15 @@ public final class GradientTreeBoostingClassifierUDTF extends UDTFWithOptions { fieldNames.add("iteration"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector); fieldNames.add("pred_models"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableStringObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableStringObjectInspector)); fieldNames.add("intercept"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); fieldNames.add("shrinkage"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); fieldNames.add("var_importance"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); fieldNames.add("oob_error_rate"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableFloatObjectInspector); @@ -271,8 +273,9 @@ public final class GradientTreeBoostingClassifierUDTF extends UDTFWithOptions { @Override public void close() throws HiveException { this._progressReporter = getReporter(); - this._iterationCounter = (_progressReporter == null) ? null : _progressReporter.getCounter( - "hivemall.smile.GradientTreeBoostingClassifier$Counter", "iteration"); + this._iterationCounter = (_progressReporter == null) ? null + : _progressReporter.getCounter( + "hivemall.smile.GradientTreeBoostingClassifier$Counter", "iteration"); reportProgress(_progressReporter); if (!labels.isEmpty()) { @@ -314,8 +317,8 @@ public final class GradientTreeBoostingClassifierUDTF extends UDTFWithOptions { private void train(@Nonnull Matrix x, @Nonnull final int[] y) throws HiveException { final int numRows = x.numRows(); if (numRows != y.length) { - throw new HiveException(String.format("The sizes of X and Y don't match: %d != %d", - numRows, y.length)); + throw new HiveException( + String.format("The sizes of X and Y don't match: %d != %d", numRows, y.length)); } checkOptions(); this._attributes = SmileExtUtils.attributeTypes(_attributes, x); @@ -404,7 +407,8 @@ public final class GradientTreeBoostingClassifierUDTF extends UDTFWithOptions { // out-of-bag error estimate int oobTests = 0, oobErrors = 0; - for (int i = sampled.nextClearBit(0); i < numInstances; i = sampled.nextClearBit(i + 1)) { + for (int i = sampled.nextClearBit(0); i < numInstances; i = + sampled.nextClearBit(i + 1)) { oobTests++; final int pred = (h[i] > 0.d) ? 1 : 0; if (pred != y[i]) { @@ -428,9 +432,9 @@ public final class GradientTreeBoostingClassifierUDTF extends UDTFWithOptions { final int numVars = SmileExtUtils.computeNumInputVars(_numVars, x); if (logger.isInfoEnabled()) { logger.info("k: " + k + ", numTrees: " + _numTrees + ", shrinkage: " + _eta - + ", subsample: " + _subsample + ", numVars: " + numVars - + ", minSamplesSplit: " + _minSamplesSplit + ", maxDepth: " + _maxDepth - + ", maxLeafs: " + _maxLeafNodes + ", seed: " + _seed); + + ", subsample: " + _subsample + ", numVars: " + numVars + ", minSamplesSplit: " + + _minSamplesSplit + ", maxDepth: " + _maxDepth + ", maxLeafs: " + _maxLeafNodes + + ", seed: " + _seed); } final int numInstances = x.numRows(); @@ -522,10 +526,11 @@ public final class GradientTreeBoostingClassifierUDTF extends UDTFWithOptions { } } - }// for each k + } // for each k // out-of-bag error estimate - for (int i = sampled.nextClearBit(0); i < numInstances; i = sampled.nextClearBit(i + 1)) { + for (int i = sampled.nextClearBit(0); i < numInstances; i = + sampled.nextClearBit(i + 1)) { oobTests++; if (prediction[i] != y[i]) { oobErrors++; @@ -540,7 +545,7 @@ public final class GradientTreeBoostingClassifierUDTF extends UDTFWithOptions { // forward a row forward(m + 1, 0.d, _eta, oobErrorRate, trees); - }// for each m + } // for each m } /**
