[HIVEMALL-191] Add Kryo serialization test to existing workaround code ## What changes were proposed in this pull request?
Add Kryo serialization test to existing workaround code as: https://github.com/apache/incubator-hivemall/commit/f6765dff7be67e1a3327709bbb9bfdc6eba7b97f To be more precise, currently two UDFs `quantified_features` and `tokenize_ja` explicitly have the workaround lazy instantiation code. So, this PR makes their `transient` keyword unnecessary. ## What type of PR is it? Improvement ## What is the Jira issue? https://issues.apache.org/jira/browse/HIVEMALL-191 ## How was this patch tested? Added some unit tests, and manually tested as well ## Checklist - [x] Did you apply source code formatter, i.e., `mvn formatter:format`, for your commit? - [x] Did you run system tests on Hive (or Spark)? Author: Takuya Kitazawa <[email protected]> Closes #145 from takuti/HIVEMALL-191. Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/b64b94f9 Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/b64b94f9 Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/b64b94f9 Branch: refs/heads/master Commit: b64b94f92b27886bb8e36b27e5463be5fda1e52f Parents: 36fb839 Author: Takuya Kitazawa <[email protected]> Authored: Wed Apr 25 17:04:02 2018 +0900 Committer: Makoto Yui <[email protected]> Committed: Wed Apr 25 17:04:02 2018 +0900 ---------------------------------------------------------------------- .../ftvec/trans/QuantifiedFeaturesUDTF.java | 33 ++++---- core/src/test/java/hivemall/TestUtils.java | 70 ++++++++++++++++ .../classifier/GeneralClassifierUDTFTest.java | 12 +++ ...ernelExpansionPassiveAggressiveUDTFTest.java | 12 +++ .../classifier/PassiveAggressiveUDTFTest.java | 23 +++++ .../hivemall/classifier/PerceptronUDTFTest.java | 15 ++++ .../fm/FactorizationMachineUDTFTest.java | 15 ++++ .../FieldAwareFactorizationMachineUDTFTest.java | 15 ++++ .../java/hivemall/ftvec/FeatureUDFTest.java | 13 +++ .../ftvec/hashing/FeatureHashingUDFTest.java | 20 +++++ .../ftvec/trans/QuantifiedFeaturesUDTFTest.java | 88 ++++++++++++++++++++ .../ftvec/trans/TestBinarizeLabelUDTF.java | 15 ++++ .../ftvec/trans/VectorizeFeaturesUDFTest.java | 27 ++++-- .../geospatial/HaversineDistanceUDFTest.java | 20 +++++ .../hivemall/geospatial/Lat2TileYUDFTest.java | 9 ++ .../hivemall/geospatial/Lon2TileXUDFTest.java | 9 ++ .../hivemall/geospatial/TileX2LonUDFTest.java | 8 ++ .../hivemall/geospatial/TileY2LatUDFTest.java | 8 ++ .../knn/distance/EuclidDistanceUDFTest.java | 16 ++++ .../knn/similarity/CosineSimilarityUDFTest.java | 15 ++++ .../knn/similarity/DIMSUMMapperUDTFTest.java | 24 ++++++ .../mf/BPRMatrixFactorizationUDTFTest.java | 14 ++++ .../mf/MatrixFactorizationAdaGradUDTFTest.java | 14 ++++ .../mf/MatrixFactorizationSGDUDTFTest.java | 14 ++++ .../java/hivemall/recommend/SlimUDTFTest.java | 72 ++++++++++++++++ .../hivemall/regression/AdaGradUDTFTest.java | 14 ++++ .../regression/GeneralRegressorUDTFTest.java | 16 +++- .../RandomForestClassifierUDTFTest.java | 35 ++++++++ .../smile/tools/TreePredictUDFTest.java | 50 +++++++++++ .../smile/tools/TreePredictUDFv1Test.java | 49 +++++++++++ .../statistics/MovingAverageUDTFTest.java | 12 +++ .../java/hivemall/tools/TryCastUDFTest.java | 14 ++++ .../tools/array/ArrayAppendUDFTest.java | 14 +++- .../tools/array/ArrayElementAtUDFTest.java | 12 +++ .../tools/array/ArrayFlattenUDFTest.java | 10 +++ .../hivemall/tools/array/ArraySliceUDFTest.java | 14 ++++ .../hivemall/tools/array/ArrayUnionUDFTest.java | 11 +++ .../tools/array/ConditionalEmitUDTFTest.java | 12 +++ .../tools/array/FirstElementUDFTest.java | 10 +++ .../tools/array/LastElementUDFTest.java | 10 +++ .../tools/array/SelectKBestUDFTest.java | 24 ++++-- .../hivemall/tools/json/FromJsonUDFTest.java | 11 +++ .../java/hivemall/tools/json/ToJsonUDFTest.java | 13 +++ .../hivemall/tools/vector/VectorAddUDFTest.java | 11 +++ .../hivemall/tools/vector/VectorDotUDFTest.java | 11 +++ .../java/hivemall/topicmodel/LDAUDTFTest.java | 16 ++++ .../java/hivemall/topicmodel/PLSAUDTFTest.java | 16 ++++ .../hivemall/nlp/tokenizer/KuromojiUDF.java | 83 +++++++++--------- .../java/hivemall/nlp/tokenizer/SmartcnUDF.java | 3 +- nlp/src/test/java/hivemall/TestUtils.java | 55 ++++++++++++ .../hivemall/nlp/tokenizer/KuromojiUDFTest.java | 47 ++++++++--- .../hivemall/nlp/tokenizer/SmartcnUDFTest.java | 31 +++++++ 52 files changed, 1101 insertions(+), 84 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/main/java/hivemall/ftvec/trans/QuantifiedFeaturesUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/ftvec/trans/QuantifiedFeaturesUDTF.java b/core/src/main/java/hivemall/ftvec/trans/QuantifiedFeaturesUDTF.java index 9299976..5b2eefe 100644 --- a/core/src/main/java/hivemall/ftvec/trans/QuantifiedFeaturesUDTF.java +++ b/core/src/main/java/hivemall/ftvec/trans/QuantifiedFeaturesUDTF.java @@ -48,8 +48,7 @@ public final class QuantifiedFeaturesUDTF extends GenericUDTF { private Identifier<String>[] identifiers; private DoubleWritable[] columnValues; - // lazy instantiation to avoid org.apache.hive.com.esotericsoftware.kryo.KryoException: java.lang.NullPointerException - private transient Object[] forwardObjs; + private Object[] forwardObjs; @SuppressWarnings("unchecked") @Override @@ -63,20 +62,21 @@ public final class QuantifiedFeaturesUDTF extends GenericUDTF { int outputSize = size - 1; this.doubleOIs = new PrimitiveObjectInspector[outputSize]; - this.columnValues = new DoubleWritable[outputSize]; this.identifiers = new Identifier[outputSize]; - this.forwardObjs = null; + this.columnValues = new DoubleWritable[outputSize]; for (int i = 0; i < outputSize; i++) { - columnValues[i] = new DoubleWritable(Double.NaN); ObjectInspector argOI = argOIs[i + 1]; if (HiveUtils.isNumberOI(argOI)) { doubleOIs[i] = HiveUtils.asDoubleCompatibleOI(argOI); } else { identifiers[i] = new Identifier<String>(); } + columnValues[i] = new DoubleWritable(Double.NaN); } + this.forwardObjs = null; + List<String> fieldNames = new ArrayList<String>(outputSize); List<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(outputSize); fieldNames.add("features"); @@ -87,32 +87,37 @@ public final class QuantifiedFeaturesUDTF extends GenericUDTF { @Override public void process(Object[] args) throws HiveException { - if (forwardObjs == null) { - this.forwardObjs = new Object[] {Arrays.asList(columnValues)}; - } - + int outputSize = args.length - 1; boolean outputRow = boolOI.get(args[0]); if (outputRow) { - final DoubleWritable[] values = this.columnValues; - for (int i = 0, outputSize = args.length - 1; i < outputSize; i++) { + if (forwardObjs == null) { + // forwardObjs internally references columnValues + List<DoubleWritable> column = new ArrayList<>(outputSize); + this.forwardObjs = new Object[] {column}; + for (int i = 0; i < outputSize; i++) { + column.add(columnValues[i]); + } + } + // updating columnValues simultaneously changes forwardObjs + for (int i = 0; i < outputSize; i++) { Object arg = args[i + 1]; Identifier<String> identifier = identifiers[i]; if (identifier == null) { double v = PrimitiveObjectInspectorUtils.getDouble(arg, doubleOIs[i]); - values[i].set(v); + columnValues[i].set(v); } else { if (arg == null) { throw new HiveException("Found Null in the input: " + Arrays.toString(args)); } else { String k = arg.toString(); int id = identifier.valueOf(k); - values[i].set(id); + columnValues[i].set(id); } } } forward(forwardObjs); } else {// load only - for (int i = 0, outputSize = args.length - 1; i < outputSize; i++) { + for (int i = 0; i < outputSize; i++) { Identifier<String> identifier = identifiers[i]; if (identifier != null) { Object arg = args[i + 1]; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/TestUtils.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/TestUtils.java b/core/src/test/java/hivemall/TestUtils.java index 12d921e..7bd1ac3 100644 --- a/core/src/test/java/hivemall/TestUtils.java +++ b/core/src/test/java/hivemall/TestUtils.java @@ -19,16 +19,86 @@ package hivemall; import java.io.ByteArrayOutputStream; +import java.io.IOException; import javax.annotation.Nonnull; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.Collector; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hive.com.esotericsoftware.kryo.Kryo; import org.apache.hive.com.esotericsoftware.kryo.io.Input; import org.apache.hive.com.esotericsoftware.kryo.io.Output; public final class TestUtils { + public static <T extends GenericUDF> void testGenericUDFSerialization(@Nonnull Class<T> clazz, + @Nonnull ObjectInspector[] ois, @Nonnull Object[] row) throws HiveException, + IOException { + final T udf; + try { + udf = clazz.newInstance(); + } catch (InstantiationException | IllegalAccessException e) { + throw new HiveException(e); + } + + udf.initialize(ois); + + // serialization after initialization + byte[] serialized = serializeObjectByKryo(udf); + deserializeObjectByKryo(serialized, clazz); + + int size = row.length; + GenericUDF.DeferredObject[] rowDeferred = new GenericUDF.DeferredObject[size]; + for (int i = 0; i < size; i++) { + rowDeferred[i] = new GenericUDF.DeferredJavaObject(row[i]); + } + + udf.evaluate(rowDeferred); + + // serialization after evaluating row + serialized = serializeObjectByKryo(udf); + TestUtils.deserializeObjectByKryo(serialized, clazz); + + udf.close(); + } + + public static <T extends GenericUDTF> void testGenericUDTFSerialization( + @Nonnull Class<T> clazz, @Nonnull ObjectInspector[] ois, @Nonnull Object[][] rows) + throws HiveException { + final T udtf; + try { + udtf = clazz.newInstance(); + } catch (InstantiationException | IllegalAccessException e) { + throw new HiveException(e); + } + + udtf.initialize(ois); + + // serialization after initialization + byte[] serialized = serializeObjectByKryo(udtf); + deserializeObjectByKryo(serialized, clazz); + + udtf.setCollector(new Collector() { + public void collect(Object input) throws HiveException { + // noop + } + }); + + for (Object[] row : rows) { + udtf.process(row); + } + + // serialization after processing row + serialized = serializeObjectByKryo(udtf); + TestUtils.deserializeObjectByKryo(serialized, clazz); + + udtf.close(); + } + @Nonnull public static byte[] serializeObjectByKryo(@Nonnull Object obj) { Kryo kryo = getKryo(); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/classifier/GeneralClassifierUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/classifier/GeneralClassifierUDTFTest.java b/core/src/test/java/hivemall/classifier/GeneralClassifierUDTFTest.java index dba4a00..9b0885d 100644 --- a/core/src/test/java/hivemall/classifier/GeneralClassifierUDTFTest.java +++ b/core/src/test/java/hivemall/classifier/GeneralClassifierUDTFTest.java @@ -21,6 +21,8 @@ package hivemall.classifier; import static hivemall.utils.hadoop.HiveUtils.lazyInteger; import static hivemall.utils.hadoop.HiveUtils.lazyLong; import static hivemall.utils.hadoop.HiveUtils.lazyString; + +import hivemall.TestUtils; import hivemall.utils.math.MathUtils; import java.io.BufferedReader; @@ -368,6 +370,16 @@ public class GeneralClassifierUDTFTest { Assert.assertTrue(accuracy > 0.8f); } + @Test + public void testSerialization() throws HiveException { + TestUtils.testGenericUDTFSerialization( + GeneralClassifierUDTF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), + PrimitiveObjectInspectorFactory.javaIntObjectInspector}, new Object[][] {{ + Arrays.asList("1:-2", "2:-1"), 0}}); + } + private static void println(String msg) { if (DEBUG) { System.out.println(msg); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/classifier/KernelExpansionPassiveAggressiveUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/classifier/KernelExpansionPassiveAggressiveUDTFTest.java b/core/src/test/java/hivemall/classifier/KernelExpansionPassiveAggressiveUDTFTest.java index 1af0ebc..ec5a382 100644 --- a/core/src/test/java/hivemall/classifier/KernelExpansionPassiveAggressiveUDTFTest.java +++ b/core/src/test/java/hivemall/classifier/KernelExpansionPassiveAggressiveUDTFTest.java @@ -18,6 +18,7 @@ */ package hivemall.classifier; +import hivemall.TestUtils; import hivemall.model.FeatureValue; import hivemall.utils.math.MathUtils; @@ -27,6 +28,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.text.ParseException; import java.util.ArrayList; +import java.util.Arrays; import java.util.StringTokenizer; import java.util.zip.GZIPInputStream; @@ -146,6 +148,16 @@ public class KernelExpansionPassiveAggressiveUDTFTest { Assert.assertTrue(accuracy > 0.82f); } + @Test + public void testSerialization() throws HiveException { + TestUtils.testGenericUDTFSerialization( + KernelExpansionPassiveAggressiveUDTF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), + PrimitiveObjectInspectorFactory.javaIntObjectInspector}, new Object[][] {{ + Arrays.asList("1:-2", "2:-1"), 0}}); + } + @Nonnull private static BufferedReader readFile(@Nonnull String fileName) throws IOException { InputStream is = KernelExpansionPassiveAggressiveUDTFTest.class.getResourceAsStream(fileName); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/classifier/PassiveAggressiveUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/classifier/PassiveAggressiveUDTFTest.java b/core/src/test/java/hivemall/classifier/PassiveAggressiveUDTFTest.java index 2322c07..721b777 100644 --- a/core/src/test/java/hivemall/classifier/PassiveAggressiveUDTFTest.java +++ b/core/src/test/java/hivemall/classifier/PassiveAggressiveUDTFTest.java @@ -19,9 +19,12 @@ package hivemall.classifier; import static org.junit.Assert.assertEquals; + +import hivemall.TestUtils; import hivemall.model.PredictionResult; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; @@ -221,4 +224,24 @@ public class PassiveAggressiveUDTFTest { assertEquals(expectedLearningRate2, udtf.eta(loss, margin2), 1e-5f); } + @Test + public void testPA1Serialization() throws HiveException { + TestUtils.testGenericUDTFSerialization( + PassiveAggressiveUDTF.PA1.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), + PrimitiveObjectInspectorFactory.javaIntObjectInspector}, new Object[][] {{ + Arrays.asList("1:-2", "2:-1"), 0}}); + } + + @Test + public void testPA2Serialization() throws HiveException { + TestUtils.testGenericUDTFSerialization( + PassiveAggressiveUDTF.PA2.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), + PrimitiveObjectInspectorFactory.javaIntObjectInspector}, new Object[][] {{ + Arrays.asList("1:-2", "2:-1"), 0}}); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/classifier/PerceptronUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/classifier/PerceptronUDTFTest.java b/core/src/test/java/hivemall/classifier/PerceptronUDTFTest.java index e04adb7..6d3dd12 100644 --- a/core/src/test/java/hivemall/classifier/PerceptronUDTFTest.java +++ b/core/src/test/java/hivemall/classifier/PerceptronUDTFTest.java @@ -19,9 +19,12 @@ package hivemall.classifier; import static org.junit.Assert.assertEquals; + +import hivemall.TestUtils; import hivemall.model.FeatureValue; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -30,6 +33,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.junit.Test; +import java.util.Arrays; + public class PerceptronUDTFTest { @Test @@ -118,4 +123,14 @@ public class PerceptronUDTFTest { assertEquals(-1.f, udtf.model.get(word3.getFeature()).get(), 1e-5f); assertEquals(0.f, udtf.model.get(word4.getFeature()).get(), 1e-5f); } + + @Test + public void testSerialization() throws HiveException { + TestUtils.testGenericUDTFSerialization( + PerceptronUDTF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), + PrimitiveObjectInspectorFactory.javaIntObjectInspector}, new Object[][] {{ + Arrays.asList("1:-2", "2:-1"), 0}}); + } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/fm/FactorizationMachineUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/fm/FactorizationMachineUDTFTest.java b/core/src/test/java/hivemall/fm/FactorizationMachineUDTFTest.java index 479f5cf..fb2b4a2 100644 --- a/core/src/test/java/hivemall/fm/FactorizationMachineUDTFTest.java +++ b/core/src/test/java/hivemall/fm/FactorizationMachineUDTFTest.java @@ -23,12 +23,14 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.StringTokenizer; import java.util.zip.GZIPInputStream; import javax.annotation.Nonnull; +import hivemall.TestUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -102,6 +104,19 @@ public class FactorizationMachineUDTFTest { Assert.assertTrue(udtf._params.l2norm); } + @Test + public void testSerialization() throws HiveException { + TestUtils.testGenericUDTFSerialization( + FactorizationMachineUDTF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, + "-factors 5 -min 1 -max 5 -iters 1 -init_v gaussian -eta0 0.01 -seed 31")}, + new Object[][] {{Arrays.asList("1:-2", "2:-1"), 1.0}}); + } + @Nonnull private static BufferedReader readFile(@Nonnull String fileName) throws IOException { InputStream is = FactorizationMachineUDTFTest.class.getResourceAsStream(fileName); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/fm/FieldAwareFactorizationMachineUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/fm/FieldAwareFactorizationMachineUDTFTest.java b/core/src/test/java/hivemall/fm/FieldAwareFactorizationMachineUDTFTest.java index 585392b..604b355 100644 --- a/core/src/test/java/hivemall/fm/FieldAwareFactorizationMachineUDTFTest.java +++ b/core/src/test/java/hivemall/fm/FieldAwareFactorizationMachineUDTFTest.java @@ -18,6 +18,7 @@ */ package hivemall.fm; +import hivemall.TestUtils; import hivemall.utils.lang.NumberUtils; import java.io.BufferedReader; @@ -26,6 +27,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.zip.GZIPInputStream; @@ -212,6 +214,19 @@ public class FieldAwareFactorizationMachineUDTFTest { Assert.assertTrue("Last loss was greater than expected: " + loss, loss < lossThreshold); } + @Test + public void testSerialization() throws HiveException { + TestUtils.testGenericUDTFSerialization( + FieldAwareFactorizationMachineUDTF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, + "-opt sgd -classification -factors 10 -w0 -seed 43")}, new Object[][] {{ + Arrays.asList("0:1:-2", "1:2:-1"), 1.0}}); + } + @Nonnull private static BufferedReader readFile(@Nonnull String fileName) throws IOException { InputStream is; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/ftvec/FeatureUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/ftvec/FeatureUDFTest.java b/core/src/test/java/hivemall/ftvec/FeatureUDFTest.java index 55dd803..17a937e 100644 --- a/core/src/test/java/hivemall/ftvec/FeatureUDFTest.java +++ b/core/src/test/java/hivemall/ftvec/FeatureUDFTest.java @@ -18,7 +18,9 @@ */ package hivemall.ftvec; +import hivemall.TestUtils; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -29,6 +31,8 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import java.io.IOException; + public class FeatureUDFTest { FeatureUDF udf = null; @@ -225,4 +229,13 @@ public class FeatureUDFTest { Assert.assertNull(ret); } + + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization(FeatureUDF.class, new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaStringObjectInspector, + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector}, new Object[] {"f1", + 2.5d}); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/ftvec/hashing/FeatureHashingUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/ftvec/hashing/FeatureHashingUDFTest.java b/core/src/test/java/hivemall/ftvec/hashing/FeatureHashingUDFTest.java index 061a971..82b5cde 100644 --- a/core/src/test/java/hivemall/ftvec/hashing/FeatureHashingUDFTest.java +++ b/core/src/test/java/hivemall/ftvec/hashing/FeatureHashingUDFTest.java @@ -18,11 +18,20 @@ */ package hivemall.ftvec.hashing; +import hivemall.TestUtils; import hivemall.utils.hashing.MurmurHash3; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.junit.Assert; import org.junit.Test; +import java.io.IOException; +import java.util.Arrays; + public class FeatureHashingUDFTest { @Test @@ -41,4 +50,15 @@ public class FeatureHashingUDFTest { FeatureHashingUDF.mhash("0", MurmurHash3.DEFAULT_NUM_FEATURES) + ":1.1", actual); } + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization( + FeatureHashingUDF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, "-features 1")}, + new Object[] {Arrays.asList("aaa#xxx", "bbb:10")}); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/ftvec/trans/QuantifiedFeaturesUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/ftvec/trans/QuantifiedFeaturesUDTFTest.java b/core/src/test/java/hivemall/ftvec/trans/QuantifiedFeaturesUDTFTest.java new file mode 100644 index 0000000..4cfc20e --- /dev/null +++ b/core/src/test/java/hivemall/ftvec/trans/QuantifiedFeaturesUDTFTest.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.ftvec.trans; + +import hivemall.TestUtils; +import hivemall.utils.hadoop.WritableUtils; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +import org.apache.hadoop.hive.ql.udf.generic.Collector; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.junit.Assert; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; + +public class QuantifiedFeaturesUDTFTest { + + @Test + public void test() throws HiveException { + final QuantifiedFeaturesUDTF udtf = new QuantifiedFeaturesUDTF(); + + udtf.initialize(new ObjectInspector[] { + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaBooleanObjectInspector, true), + PrimitiveObjectInspectorFactory.javaStringObjectInspector, + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector}); + + final List<List<Double>> quantifiedInputs = new ArrayList<>(); + udtf.setCollector(new Collector() { + public void collect(Object input) throws HiveException { + Object[] row = (Object[]) input; + List<DoubleWritable> column = (List<DoubleWritable>) row[0]; + List<Double> quantifiedInput = new ArrayList<>(); + for (DoubleWritable elem : column) { + quantifiedInput.add(elem.get()); + } + quantifiedInputs.add(quantifiedInput); + } + }); + + udtf.process(new Object[] {WritableUtils.val(true), "aaa", 1.0}); + udtf.process(new Object[] {WritableUtils.val(true), "bbb", 2.0}); + + udtf.close(); + + Assert.assertEquals(2, quantifiedInputs.size()); + + List<Double> quantifiedInput = quantifiedInputs.get(0); + Assert.assertTrue(quantifiedInput.get(0) == 0.d); + Assert.assertTrue(quantifiedInput.get(1) == 1.d); + + quantifiedInput = quantifiedInputs.get(1); + Assert.assertTrue(quantifiedInput.get(0) == 1.d); + Assert.assertTrue(quantifiedInput.get(1) == 2.d); + } + + @Test + public void testSerialization() throws HiveException { + TestUtils.testGenericUDTFSerialization( + QuantifiedFeaturesUDTF.class, + new ObjectInspector[] { + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaBooleanObjectInspector, true), + PrimitiveObjectInspectorFactory.javaStringObjectInspector, + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector}, new Object[][] {{ + WritableUtils.val(true), "aaa", 1.0}}); + } +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/ftvec/trans/TestBinarizeLabelUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/ftvec/trans/TestBinarizeLabelUDTF.java b/core/src/test/java/hivemall/ftvec/trans/TestBinarizeLabelUDTF.java index a8d2beb..67e0a3e 100644 --- a/core/src/test/java/hivemall/ftvec/trans/TestBinarizeLabelUDTF.java +++ b/core/src/test/java/hivemall/ftvec/trans/TestBinarizeLabelUDTF.java @@ -24,6 +24,8 @@ import static org.mockito.Mockito.times; import static org.powermock.api.mockito.PowerMockito.doNothing; import static org.powermock.api.mockito.PowerMockito.spy; import static org.powermock.api.mockito.PowerMockito.verifyPrivate; + +import hivemall.TestUtils; import hivemall.utils.hadoop.WritableUtils; import java.util.Arrays; @@ -104,4 +106,17 @@ public class TestBinarizeLabelUDTF { verifyPrivate(udtf, times(0)).invoke("forward", any(Object[].class)); } + + @Test + public void testSerialization() throws HiveException { + final List<String> featureNames = Arrays.asList("positive", "negative", "features"); + TestUtils.testGenericUDTFSerialization( + BinarizeLabelUDTF.class, + new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + ObjectInspectorFactory.getStandardConstantListObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, featureNames)}, + new Object[][] {{new Integer(0), new Integer(0), WritableUtils.val("a:1", "b:2")}}); + } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/ftvec/trans/VectorizeFeaturesUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/ftvec/trans/VectorizeFeaturesUDFTest.java b/core/src/test/java/hivemall/ftvec/trans/VectorizeFeaturesUDFTest.java index e826963..cf28350 100644 --- a/core/src/test/java/hivemall/ftvec/trans/VectorizeFeaturesUDFTest.java +++ b/core/src/test/java/hivemall/ftvec/trans/VectorizeFeaturesUDFTest.java @@ -18,6 +18,7 @@ */ package hivemall.ftvec.trans; +import hivemall.TestUtils; import hivemall.utils.hadoop.WritableUtils; import java.io.IOException; @@ -88,7 +89,7 @@ public class VectorizeFeaturesUDFTest { arguments[2] = new DeferredJavaObject("1.1"); List<Text> actuals = udf.evaluate(arguments); - //System.out.println(actuals); + //System.out.println(actuals); List<Text> expected = WritableUtils.val("a:0.1", "b:1.1"); Assert.assertEquals(expected, actuals); @@ -111,7 +112,7 @@ public class VectorizeFeaturesUDFTest { arguments[2] = new DeferredJavaObject("0"); List<Text> actuals = udf.evaluate(arguments); - //System.out.println(actuals); + //System.out.println(actuals); List<Text> expected = WritableUtils.val(new String[] {"a:0.1"}); Assert.assertEquals(expected, actuals); @@ -134,7 +135,7 @@ public class VectorizeFeaturesUDFTest { arguments[2] = new DeferredJavaObject(new Boolean(false)); List<Text> actuals = udf.evaluate(arguments); - //System.out.println(actuals); + //System.out.println(actuals); List<Text> expected = WritableUtils.val(new String[] {"a:0.1"}); Assert.assertEquals(expected, actuals); @@ -163,19 +164,19 @@ public class VectorizeFeaturesUDFTest { arguments[2] = new DeferredJavaObject("dayofweek"); List<Text> actuals = udf.evaluate(arguments); - //System.out.println(actuals); + //System.out.println(actuals); List<Text> expected = WritableUtils.val("a:0.1", "b#dayofweek"); Assert.assertEquals(expected, actuals); arguments[2] = new DeferredJavaObject("1.0"); actuals = udf.evaluate(arguments); - //System.out.println(actuals); + //System.out.println(actuals); expected = WritableUtils.val("a:0.1", "b:1.0"); Assert.assertEquals(expected, actuals); arguments[2] = new DeferredJavaObject("1"); actuals = udf.evaluate(arguments); - //System.out.println(actuals); + //System.out.println(actuals); expected = WritableUtils.val("a:0.1", "b:1.0"); Assert.assertEquals(expected, actuals); @@ -188,4 +189,18 @@ public class VectorizeFeaturesUDFTest { udf.close(); } + @Test + public void testSerialization() throws HiveException, IOException { + final List<String> featureNames = Arrays.asList("q", "c"); + + TestUtils.testGenericUDFSerialization( + VectorizeFeaturesUDF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardConstantListObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, featureNames), + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + PrimitiveObjectInspectorFactory.javaStringObjectInspector}, new Object[] { + featureNames, 0.1d, "dayofweek"}); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/geospatial/HaversineDistanceUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/geospatial/HaversineDistanceUDFTest.java b/core/src/test/java/hivemall/geospatial/HaversineDistanceUDFTest.java index af5316a..5ec709b 100644 --- a/core/src/test/java/hivemall/geospatial/HaversineDistanceUDFTest.java +++ b/core/src/test/java/hivemall/geospatial/HaversineDistanceUDFTest.java @@ -20,6 +20,7 @@ package hivemall.geospatial; import java.io.IOException; +import hivemall.TestUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; @@ -102,4 +103,23 @@ public class HaversineDistanceUDFTest { udf.close(); } + @Test + public void testSerialization() throws HiveException, IOException { + // Tokyo + double lat1 = 35.6833d, lon1 = 139.7667d; + // Osaka + double lat2 = 34.6603d, lon2 = 135.5232d; + + TestUtils.testGenericUDFSerialization( + HaversineDistanceUDF.class, + new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaBooleanObjectInspector, true)}, + new Object[] {lat1, lon1, lat2, lon2, true}); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/geospatial/Lat2TileYUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/geospatial/Lat2TileYUDFTest.java b/core/src/test/java/hivemall/geospatial/Lat2TileYUDFTest.java index 8fa1ada..e4006db 100644 --- a/core/src/test/java/hivemall/geospatial/Lat2TileYUDFTest.java +++ b/core/src/test/java/hivemall/geospatial/Lat2TileYUDFTest.java @@ -20,6 +20,7 @@ package hivemall.geospatial; import java.io.IOException; +import hivemall.TestUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; @@ -45,4 +46,12 @@ public class Lat2TileYUDFTest { udf.close(); } + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization(Lat2TileYUDF.class, new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + PrimitiveObjectInspectorFactory.javaIntObjectInspector}, new Object[] {49.60055d, + 13}); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/geospatial/Lon2TileXUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/geospatial/Lon2TileXUDFTest.java b/core/src/test/java/hivemall/geospatial/Lon2TileXUDFTest.java index cd82826..6191321 100644 --- a/core/src/test/java/hivemall/geospatial/Lon2TileXUDFTest.java +++ b/core/src/test/java/hivemall/geospatial/Lon2TileXUDFTest.java @@ -20,6 +20,7 @@ package hivemall.geospatial; import java.io.IOException; +import hivemall.TestUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; @@ -45,4 +46,12 @@ public class Lon2TileXUDFTest { udf.close(); } + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization(Lon2TileXUDF.class, new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector, + PrimitiveObjectInspectorFactory.javaIntObjectInspector}, new Object[] {11.01296d, + 13}); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/geospatial/TileX2LonUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/geospatial/TileX2LonUDFTest.java b/core/src/test/java/hivemall/geospatial/TileX2LonUDFTest.java index ce2b0c9..1bbe4a4 100644 --- a/core/src/test/java/hivemall/geospatial/TileX2LonUDFTest.java +++ b/core/src/test/java/hivemall/geospatial/TileX2LonUDFTest.java @@ -20,6 +20,7 @@ package hivemall.geospatial; import java.io.IOException; +import hivemall.TestUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; @@ -45,4 +46,11 @@ public class TileX2LonUDFTest { udf.close(); } + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization(TileX2LonUDF.class, new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + PrimitiveObjectInspectorFactory.javaIntObjectInspector}, new Object[] {3551, 13}); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/geospatial/TileY2LatUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/geospatial/TileY2LatUDFTest.java b/core/src/test/java/hivemall/geospatial/TileY2LatUDFTest.java index 5f4b516..688b509 100644 --- a/core/src/test/java/hivemall/geospatial/TileY2LatUDFTest.java +++ b/core/src/test/java/hivemall/geospatial/TileY2LatUDFTest.java @@ -20,6 +20,7 @@ package hivemall.geospatial; import java.io.IOException; +import hivemall.TestUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; @@ -45,4 +46,11 @@ public class TileY2LatUDFTest { udf.close(); } + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization(TileY2LatUDF.class, new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + PrimitiveObjectInspectorFactory.javaIntObjectInspector}, new Object[] {503, 14}); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/knn/distance/EuclidDistanceUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/knn/distance/EuclidDistanceUDFTest.java b/core/src/test/java/hivemall/knn/distance/EuclidDistanceUDFTest.java index e91ec6d..64bfe1b 100644 --- a/core/src/test/java/hivemall/knn/distance/EuclidDistanceUDFTest.java +++ b/core/src/test/java/hivemall/knn/distance/EuclidDistanceUDFTest.java @@ -18,9 +18,15 @@ */ package hivemall.knn.distance; +import java.io.IOException; import java.util.Arrays; import java.util.List; +import hivemall.TestUtils; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.junit.Assert; import org.junit.Test; @@ -42,4 +48,14 @@ public class EuclidDistanceUDFTest { Assert.assertEquals(Math.sqrt(1.0 + 9.0 + 9.0), d, 0.f); } + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization( + EuclidDistanceUDF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector)}, + new Object[] {Arrays.asList("1:1.0", "2:3.0", "3:3.0"), Arrays.asList("1:2.0", "3:6.0")}); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/knn/similarity/CosineSimilarityUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/knn/similarity/CosineSimilarityUDFTest.java b/core/src/test/java/hivemall/knn/similarity/CosineSimilarityUDFTest.java index 9825e9b..9f319a9 100644 --- a/core/src/test/java/hivemall/knn/similarity/CosineSimilarityUDFTest.java +++ b/core/src/test/java/hivemall/knn/similarity/CosineSimilarityUDFTest.java @@ -22,6 +22,11 @@ import java.io.IOException; import java.util.Arrays; import java.util.List; +import hivemall.TestUtils; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.junit.Assert; import org.junit.Test; @@ -107,4 +112,14 @@ public class CosineSimilarityUDFTest { CosineSimilarityUDF.cosineSimilarity(Arrays.asList("1", "2"), Arrays.asList("1", "2")), 0.0); } + + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization( + CosineSimilarityUDF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector)}, + new Object[] {Arrays.asList("1:1.0", "2:3.0", "3:3.0"), Arrays.asList("1:2.0", "3:6.0")}); + } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/knn/similarity/DIMSUMMapperUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/knn/similarity/DIMSUMMapperUDTFTest.java b/core/src/test/java/hivemall/knn/similarity/DIMSUMMapperUDTFTest.java index 898b4cb..766dade 100644 --- a/core/src/test/java/hivemall/knn/similarity/DIMSUMMapperUDTFTest.java +++ b/core/src/test/java/hivemall/knn/similarity/DIMSUMMapperUDTFTest.java @@ -18,6 +18,7 @@ */ package hivemall.knn.similarity; +import hivemall.TestUtils; import hivemall.mf.BPRMatrixFactorizationUDTFTest; import hivemall.utils.hadoop.HiveUtils; import hivemall.utils.lang.StringUtils; @@ -357,6 +358,29 @@ public class DIMSUMMapperUDTFTest { emitCounter.getValue() < numMaxEmits); } + @Test + public void testSerialization() throws HiveException { + final Integer[] itemIDs = new Integer[] {1, 2, 3}; + + final List<String> user = new ArrayList<String>(); + convertRowToFeatures(0, user, itemIDs); + + final Map<Integer, Double> norms = new HashMap<Integer, Double>(); + computeColumnNorms(norms, itemIDs); + + TestUtils.testGenericUDTFSerialization( + DIMSUMMapperUDTF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), + ObjectInspectorFactory.getStandardMapObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector), + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, + "-threshold 0.999999 -disable_symmetric_output")}, new Object[][] {{user, + norms}}); + } + @Nonnull private static BufferedReader readFile(@Nonnull String fileName) throws IOException { // use MF's resource file http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/mf/BPRMatrixFactorizationUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/mf/BPRMatrixFactorizationUDTFTest.java b/core/src/test/java/hivemall/mf/BPRMatrixFactorizationUDTFTest.java index 41f1f97..d8dd820 100644 --- a/core/src/test/java/hivemall/mf/BPRMatrixFactorizationUDTFTest.java +++ b/core/src/test/java/hivemall/mf/BPRMatrixFactorizationUDTFTest.java @@ -18,6 +18,7 @@ */ package hivemall.mf; +import hivemall.TestUtils; import hivemall.utils.lang.StringUtils; import java.io.BufferedReader; @@ -111,6 +112,19 @@ public class BPRMatrixFactorizationUDTFTest { Assert.assertTrue("finishedIter: " + finishedIter, finishedIter < iterations); } + @Test + public void testSerialization() throws HiveException { + TestUtils.testGenericUDTFSerialization( + BPRMatrixFactorizationUDTF.class, + new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, + "-factor 10 -iter 1")}, new Object[][] {{0, 0, 1}}); + } + @Nonnull private static BufferedReader readFile(@Nonnull String fileName) throws IOException { InputStream is = BPRMatrixFactorizationUDTFTest.class.getResourceAsStream(fileName); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/mf/MatrixFactorizationAdaGradUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/mf/MatrixFactorizationAdaGradUDTFTest.java b/core/src/test/java/hivemall/mf/MatrixFactorizationAdaGradUDTFTest.java index e587f01..64f54fa 100644 --- a/core/src/test/java/hivemall/mf/MatrixFactorizationAdaGradUDTFTest.java +++ b/core/src/test/java/hivemall/mf/MatrixFactorizationAdaGradUDTFTest.java @@ -18,6 +18,7 @@ */ package hivemall.mf; +import hivemall.TestUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; @@ -80,4 +81,17 @@ public class MatrixFactorizationAdaGradUDTFTest { } } + @Test + public void testSerialization() throws HiveException { + TestUtils.testGenericUDTFSerialization( + MatrixFactorizationAdaGradUDTF.class, + new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + PrimitiveObjectInspectorFactory.javaFloatObjectInspector, + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, "-factor 10")}, + new Object[][] {{0, 0, 5.f}}); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/mf/MatrixFactorizationSGDUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/mf/MatrixFactorizationSGDUDTFTest.java b/core/src/test/java/hivemall/mf/MatrixFactorizationSGDUDTFTest.java index 53983c1..1f35da1 100644 --- a/core/src/test/java/hivemall/mf/MatrixFactorizationSGDUDTFTest.java +++ b/core/src/test/java/hivemall/mf/MatrixFactorizationSGDUDTFTest.java @@ -18,6 +18,7 @@ */ package hivemall.mf; +import hivemall.TestUtils; import hivemall.mf.FactorizedModel.RankInitScheme; import hivemall.utils.lang.mutable.MutableInt; @@ -343,4 +344,17 @@ public class MatrixFactorizationSGDUDTFTest { Assert.assertEquals(5, numCollected.intValue()); Assert.assertFalse(tmpFile.exists()); } + + @Test + public void testSerialization() throws HiveException { + TestUtils.testGenericUDTFSerialization( + MatrixFactorizationSGDUDTF.class, + new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + PrimitiveObjectInspectorFactory.javaFloatObjectInspector, + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, "-factor 10")}, + new Object[][] {{0, 0, 5.f}}); + } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/recommend/SlimUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/recommend/SlimUDTFTest.java b/core/src/test/java/hivemall/recommend/SlimUDTFTest.java index 00b78f0..0bb57d3 100644 --- a/core/src/test/java/hivemall/recommend/SlimUDTFTest.java +++ b/core/src/test/java/hivemall/recommend/SlimUDTFTest.java @@ -21,6 +21,7 @@ package hivemall.recommend; import java.util.HashMap; import java.util.Map; +import hivemall.TestUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -96,4 +97,75 @@ public class SlimUDTFTest { slim.finalizeTraining(); } + @Test + public void testSerialization() throws HiveException { + int numUser = 4; + int numItem = 5; + + float[][] data = { {1.f, 4.f, 0.f, 0.f, 0.f}, {0.f, 3.f, 0.f, 1.f, 2.f}, + {2.f, 2.f, 0.f, 0.f, 3.f}, {0.f, 1.f, 1.f, 0.f, 0.f}}; + + Object[][] rows = new Object[numItem * (numItem - 1)][5]; + int ri = 0; + + for (int i = 0; i < numItem; i++) { + Map<Integer, Float> Ri = new HashMap<>(); + for (int u = 0; u < numUser; u++) { + if (data[u][i] != 0.) { + Ri.put(u, data[u][i]); + } + } + + // most similar data + Map<Integer, Map<Integer, Float>> knnRatesOfI = new HashMap<>(); + for (int u = 0; u < numUser; u++) { + Map<Integer, Float> Ru = new HashMap<>(); + for (int k = 0; k < numItem; k++) { + if (k == i) + continue; + Ru.put(k, data[u][k]); + } + knnRatesOfI.put(u, Ru); + } + + for (int j = 0; j < numItem; j++) { + if (i == j) + continue; + Map<Integer, Float> Rj = new HashMap<>(); + for (int u = 0; u < numUser; u++) { + if (data[u][j] != 0.) { + Rj.put(u, data[u][j]); + } + } + + rows[ri][0] = i; + rows[ri][1] = Ri; + rows[ri][2] = knnRatesOfI; + rows[ri][3] = j; + rows[ri][4] = Rj; + ri += 1; + } + } + + TestUtils.testGenericUDTFSerialization( + SlimUDTF.class, + new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + ObjectInspectorFactory.getStandardMapObjectInspector( + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + PrimitiveObjectInspectorFactory.javaFloatObjectInspector), + ObjectInspectorFactory.getStandardMapObjectInspector( + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + ObjectInspectorFactory.getStandardMapObjectInspector( + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + PrimitiveObjectInspectorFactory.javaFloatObjectInspector)), + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + ObjectInspectorFactory.getStandardMapObjectInspector( + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + PrimitiveObjectInspectorFactory.javaFloatObjectInspector), + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, + "-l2 0.01 -l1 0.01")}, rows); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/regression/AdaGradUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/regression/AdaGradUDTFTest.java b/core/src/test/java/hivemall/regression/AdaGradUDTFTest.java index fa7e28a..f80c579 100644 --- a/core/src/test/java/hivemall/regression/AdaGradUDTFTest.java +++ b/core/src/test/java/hivemall/regression/AdaGradUDTFTest.java @@ -20,7 +20,9 @@ package hivemall.regression; import static org.junit.Assert.assertEquals; +import hivemall.TestUtils; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -28,6 +30,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.junit.Test; +import java.util.Arrays; + public class AdaGradUDTFTest { @SuppressWarnings("deprecation") @@ -57,4 +61,14 @@ public class AdaGradUDTFTest { labelOI}); assertEquals("struct<feature:bigint,weight:float>", longListSOI.getTypeName()); } + + @Test + public void testSerialization() throws HiveException { + TestUtils.testGenericUDTFSerialization( + AdaGradUDTF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), + PrimitiveObjectInspectorFactory.javaFloatObjectInspector}, new Object[][] {{ + Arrays.asList("1:-2", "2:-1"), 1.f}}); + } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/regression/GeneralRegressorUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/regression/GeneralRegressorUDTFTest.java b/core/src/test/java/hivemall/regression/GeneralRegressorUDTFTest.java index efc0699..da33f5d 100644 --- a/core/src/test/java/hivemall/regression/GeneralRegressorUDTFTest.java +++ b/core/src/test/java/hivemall/regression/GeneralRegressorUDTFTest.java @@ -28,6 +28,7 @@ import java.util.List; import javax.annotation.Nonnull; +import hivemall.TestUtils; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.Collector; @@ -165,7 +166,7 @@ public class GeneralRegressorUDTFTest { } @Test(expected = IllegalArgumentException.class) - public void testIlleagalStringFeature() throws Exception { + public void testIllegalStringFeature() throws Exception { List<String> x = Arrays.asList("1:-2jjjj", "2:-1"); ObjectInspector featureOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector; testFeature(x, featureOI, String.class, String.class); @@ -323,6 +324,19 @@ public class GeneralRegressorUDTFTest { } } + @Test + public void testSerialization() throws HiveException { + TestUtils.testGenericUDTFSerialization( + GeneralRegressorUDTF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), + PrimitiveObjectInspectorFactory.javaFloatObjectInspector, + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, + "-loss SquaredLoss")}, + new Object[][] {{Arrays.asList("1:-2", "2:-1"), 10.f}}); + } + private static void println(String msg) { if (DEBUG) { System.out.println(msg); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/smile/classification/RandomForestClassifierUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/smile/classification/RandomForestClassifierUDTFTest.java b/core/src/test/java/hivemall/smile/classification/RandomForestClassifierUDTFTest.java index 578689c..903822b 100644 --- a/core/src/test/java/hivemall/smile/classification/RandomForestClassifierUDTFTest.java +++ b/core/src/test/java/hivemall/smile/classification/RandomForestClassifierUDTFTest.java @@ -18,6 +18,7 @@ */ package hivemall.smile.classification; +import hivemall.TestUtils; import hivemall.classifier.KernelExpansionPassiveAggressiveUDTF; import hivemall.utils.codec.Base91; import hivemall.utils.lang.mutable.MutableInt; @@ -360,6 +361,40 @@ public class RandomForestClassifierUDTFTest { Assert.assertTrue("oob error rate is too high: " + oobErrorRate, oobErrorRate < 0.3); } + @Test + public void testSerialization() throws HiveException, IOException, ParseException { + URL url = new URL( + "https://gist.githubusercontent.com/myui/143fa9d05bd6e7db0114/raw/500f178316b802f1cade6e3bf8dc814a96e84b1e/iris.arff"); + InputStream is = new BufferedInputStream(url.openStream()); + + ArffParser arffParser = new ArffParser(); + arffParser.setResponseIndex(4); + + AttributeDataset iris = arffParser.parse(is); + int size = iris.size(); + double[][] x = iris.toArray(new double[size][]); + int[] y = iris.toArray(new int[size]); + + final Object[][] rows = new Object[size][2]; + for (int i = 0; i < size; i++) { + double[] row = x[i]; + final List<String> xi = new ArrayList<String>(x[0].length); + for (int j = 0; j < row.length; j++) { + xi.add(j + ":" + row[j]); + } + rows[i][0] = xi; + rows[i][1] = y[i]; + } + + TestUtils.testGenericUDTFSerialization( + RandomForestClassifierUDTF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, "-trees 49")}, + rows); + } @Nonnull private static BufferedReader readFile(@Nonnull String fileName) throws IOException { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/smile/tools/TreePredictUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/smile/tools/TreePredictUDFTest.java b/core/src/test/java/hivemall/smile/tools/TreePredictUDFTest.java index 31713d9..1c21e0d 100644 --- a/core/src/test/java/hivemall/smile/tools/TreePredictUDFTest.java +++ b/core/src/test/java/hivemall/smile/tools/TreePredictUDFTest.java @@ -18,6 +18,7 @@ */ package hivemall.smile.tools; +import hivemall.TestUtils; import hivemall.math.matrix.dense.RowMajorDenseMatrix2d; import hivemall.smile.classification.DecisionTree; import hivemall.smile.data.Attribute; @@ -206,6 +207,55 @@ public class TreePredictUDFTest { return result.get(); } + @Test + public void testSerialization() throws HiveException, IOException, ParseException { + URL url = new URL( + "https://gist.githubusercontent.com/myui/ef17aabecf0c0c5bcb69/raw/aac0575b4d43072c6f3c82d9072fdefb61892694/cpu.arff"); + InputStream is = new BufferedInputStream(url.openStream()); + + ArffParser arffParser = new ArffParser(); + arffParser.setResponseIndex(6); + AttributeDataset data = arffParser.parse(is); + double[] datay = data.toArray(new double[data.size()]); + double[][] datax = data.toArray(new double[data.size()][]); + + int n = datax.length; + int m = 3 * n / 4; + int[] index = Math.permutate(n); + + double[][] trainx = new double[m][]; + double[] trainy = new double[m]; + for (int i = 0; i < m; i++) { + trainx[i] = datax[index[i]]; + trainy[i] = datay[index[i]]; + } + + double[][] testx = new double[n - m][]; + double[] testy = new double[n - m]; + for (int i = m; i < n; i++) { + testx[i - m] = datax[index[i]]; + testy[i - m] = datay[index[i]]; + } + + Attribute[] attrs = SmileExtUtils.convertAttributeTypes(data.attributes()); + RegressionTree tree = new RegressionTree(attrs, new RowMajorDenseMatrix2d(trainx, + trainx[0].length), trainy, 20); + + byte[] b = tree.serialize(true); + byte[] encoded = Base91.encode(b); + Text model = new Text(encoded); + + TestUtils.testGenericUDFSerialization( + TreePredictUDF.class, + new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaStringObjectInspector, + PrimitiveObjectInspectorFactory.writableStringObjectInspector, + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector), + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaBooleanObjectInspector, false)}, + new Object[] {"model_id#1", model, ArrayUtils.toList(testx[0])}); + } + private static void debugPrint(String msg) { if (DEBUG) { System.out.println(msg); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/smile/tools/TreePredictUDFv1Test.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/smile/tools/TreePredictUDFv1Test.java b/core/src/test/java/hivemall/smile/tools/TreePredictUDFv1Test.java index 4251ca9..779eb73 100644 --- a/core/src/test/java/hivemall/smile/tools/TreePredictUDFv1Test.java +++ b/core/src/test/java/hivemall/smile/tools/TreePredictUDFv1Test.java @@ -20,6 +20,7 @@ package hivemall.smile.tools; import static org.junit.Assert.assertEquals; +import hivemall.TestUtils; import hivemall.math.matrix.dense.RowMajorDenseMatrix2d; import hivemall.smile.classification.DecisionTree; import hivemall.smile.data.Attribute; @@ -228,6 +229,54 @@ public class TreePredictUDFv1Test { return result.get(); } + @Test + public void testSerialization() throws HiveException, IOException, ParseException { + URL url = new URL( + "https://gist.githubusercontent.com/myui/ef17aabecf0c0c5bcb69/raw/aac0575b4d43072c6f3c82d9072fdefb61892694/cpu.arff"); + InputStream is = new BufferedInputStream(url.openStream()); + + ArffParser arffParser = new ArffParser(); + arffParser.setResponseIndex(6); + AttributeDataset data = arffParser.parse(is); + double[] datay = data.toArray(new double[data.size()]); + double[][] datax = data.toArray(new double[data.size()][]); + + int n = datax.length; + int m = 3 * n / 4; + int[] index = Math.permutate(n); + + double[][] trainx = new double[m][]; + double[] trainy = new double[m]; + for (int i = 0; i < m; i++) { + trainx[i] = datax[index[i]]; + trainy[i] = datay[index[i]]; + } + + double[][] testx = new double[n - m][]; + double[] testy = new double[n - m]; + for (int i = m; i < n; i++) { + testx[i - m] = datax[index[i]]; + testy[i - m] = datay[index[i]]; + } + + Attribute[] attrs = SmileExtUtils.convertAttributeTypes(data.attributes()); + RegressionTree tree = new RegressionTree(attrs, new RowMajorDenseMatrix2d(trainx, + trainx[0].length), trainy, 20); + String opScript = tree.predictOpCodegen(StackMachine.SEP); + + TestUtils.testGenericUDFSerialization( + TreePredictUDFv1.class, + new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaStringObjectInspector, + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + PrimitiveObjectInspectorFactory.javaStringObjectInspector, + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector), + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaBooleanObjectInspector, false)}, + new Object[] {"model_id#1", ModelType.opscode.getId(), opScript, + ArrayUtils.toList(testx[0])}); + } + private static void debugPrint(String msg) { if (DEBUG) { System.out.println(msg); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/statistics/MovingAverageUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/statistics/MovingAverageUDTFTest.java b/core/src/test/java/hivemall/statistics/MovingAverageUDTFTest.java index 3ab7e8b..4439bf5 100644 --- a/core/src/test/java/hivemall/statistics/MovingAverageUDTFTest.java +++ b/core/src/test/java/hivemall/statistics/MovingAverageUDTFTest.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import hivemall.TestUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.Collector; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -65,4 +66,15 @@ public class MovingAverageUDTFTest { Assert.assertEquals(Arrays.asList(1.d, 1.5d, 2.d, 3.d, 4.d, 5.d, 6.d), results); } + @Test + public void testSerialization() throws HiveException { + TestUtils.testGenericUDTFSerialization( + MovingAverageUDTF.class, + new ObjectInspector[] { + PrimitiveObjectInspectorFactory.javaFloatObjectInspector, + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaIntObjectInspector, 3)}, + new Object[][] { {1.f}, {2.f}, {3.f}, {4.f}, {5.f}}); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/tools/TryCastUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/TryCastUDFTest.java b/core/src/test/java/hivemall/tools/TryCastUDFTest.java index 7cd75ba..09235b2 100644 --- a/core/src/test/java/hivemall/tools/TryCastUDFTest.java +++ b/core/src/test/java/hivemall/tools/TryCastUDFTest.java @@ -18,15 +18,18 @@ */ package hivemall.tools; +import hivemall.TestUtils; import hivemall.utils.hadoop.WritableUtils; import java.io.IOException; +import java.util.Arrays; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.Text; @@ -55,4 +58,15 @@ public class TryCastUDFTest { udf.close(); } + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization( + TryCastUDF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector), + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, "array<string>")}, + new Object[] {Arrays.asList(1.d, 2.d, 3.d)}); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/tools/array/ArrayAppendUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/array/ArrayAppendUDFTest.java b/core/src/test/java/hivemall/tools/array/ArrayAppendUDFTest.java index 113e993..74ae2d5 100644 --- a/core/src/test/java/hivemall/tools/array/ArrayAppendUDFTest.java +++ b/core/src/test/java/hivemall/tools/array/ArrayAppendUDFTest.java @@ -18,9 +18,11 @@ */ package hivemall.tools.array; +import hivemall.TestUtils; import hivemall.utils.hadoop.WritableUtils; import java.io.IOException; +import java.util.Arrays; import java.util.List; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -85,7 +87,7 @@ public class ArrayAppendUDFTest { ArrayAppendUDF udf = new ArrayAppendUDF(); udf.initialize(new ObjectInspector[] { - ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector), + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector), PrimitiveObjectInspectorFactory.javaDoubleObjectInspector}); DeferredObject[] args = new DeferredObject[] {new GenericUDF.DeferredJavaObject(null), @@ -98,4 +100,14 @@ public class ArrayAppendUDFTest { udf.close(); } + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization( + ArrayAppendUDF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector), + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector}, new Object[] { + Arrays.asList(0.d, 1.d, 2.d), 3.d}); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/tools/array/ArrayElementAtUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/array/ArrayElementAtUDFTest.java b/core/src/test/java/hivemall/tools/array/ArrayElementAtUDFTest.java index 95ef1a2..39a5cd9 100644 --- a/core/src/test/java/hivemall/tools/array/ArrayElementAtUDFTest.java +++ b/core/src/test/java/hivemall/tools/array/ArrayElementAtUDFTest.java @@ -18,9 +18,11 @@ */ package hivemall.tools.array; +import hivemall.TestUtils; import hivemall.utils.hadoop.WritableUtils; import java.io.IOException; +import java.util.Arrays; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; @@ -78,4 +80,14 @@ public class ArrayElementAtUDFTest { udf.close(); } + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization( + ArrayElementAtUDF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector), + PrimitiveObjectInspectorFactory.javaIntObjectInspector}, + new Object[] {Arrays.asList(0.d, 1.d, 2.d), 1}); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/tools/array/ArrayFlattenUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/array/ArrayFlattenUDFTest.java b/core/src/test/java/hivemall/tools/array/ArrayFlattenUDFTest.java index 3824fd5..309dfad 100644 --- a/core/src/test/java/hivemall/tools/array/ArrayFlattenUDFTest.java +++ b/core/src/test/java/hivemall/tools/array/ArrayFlattenUDFTest.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.util.Arrays; import java.util.List; +import hivemall.TestUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; @@ -51,4 +52,13 @@ public class ArrayFlattenUDFTest { udf.close(); } + + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization( + ArrayFlattenUDF.class, + new ObjectInspector[] {ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaIntObjectInspector))}, + new Object[] {Arrays.asList(Arrays.asList(0, 1, 2, 3), Arrays.asList(4, 5), + Arrays.asList(6, 7))}); + } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/tools/array/ArraySliceUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/array/ArraySliceUDFTest.java b/core/src/test/java/hivemall/tools/array/ArraySliceUDFTest.java index b18ae54..760e476 100644 --- a/core/src/test/java/hivemall/tools/array/ArraySliceUDFTest.java +++ b/core/src/test/java/hivemall/tools/array/ArraySliceUDFTest.java @@ -23,6 +23,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import hivemall.TestUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; @@ -110,4 +111,17 @@ public class ArraySliceUDFTest { udf.close(); } + + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization( + ArraySliceUDF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + PrimitiveObjectInspectorFactory.javaIntObjectInspector}, + new Object[] { + Arrays.asList("zero", "one", "two", "three", "four", "five", "six", "seven", + "eight", "nine", "ten"), 2, 5}); + } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/tools/array/ArrayUnionUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/array/ArrayUnionUDFTest.java b/core/src/test/java/hivemall/tools/array/ArrayUnionUDFTest.java index 1138e9a..eca4212 100644 --- a/core/src/test/java/hivemall/tools/array/ArrayUnionUDFTest.java +++ b/core/src/test/java/hivemall/tools/array/ArrayUnionUDFTest.java @@ -18,9 +18,11 @@ */ package hivemall.tools.array; +import hivemall.TestUtils; import hivemall.utils.hadoop.WritableUtils; import java.io.IOException; +import java.util.Arrays; import java.util.List; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -58,4 +60,13 @@ public class ArrayUnionUDFTest { udf.close(); } + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization( + ArrayUnionUDF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector), + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector)}, + new Object[] {Arrays.asList(0.d, 1.d), Arrays.asList(2.d, 3.d)}); + } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/tools/array/ConditionalEmitUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/array/ConditionalEmitUDTFTest.java b/core/src/test/java/hivemall/tools/array/ConditionalEmitUDTFTest.java index 9564cef..70ad25e 100644 --- a/core/src/test/java/hivemall/tools/array/ConditionalEmitUDTFTest.java +++ b/core/src/test/java/hivemall/tools/array/ConditionalEmitUDTFTest.java @@ -23,6 +23,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import hivemall.TestUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.Collector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -65,4 +66,15 @@ public class ConditionalEmitUDTFTest { udtf.close(); } + @Test + public void testSerialization() throws HiveException { + TestUtils.testGenericUDTFSerialization( + ConditionalEmitUDTF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector), + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector)}, + new Object[][] { + {Arrays.asList(true, false, true), Arrays.asList("one", "two", "three")}, + {Arrays.asList(true, true, false), Arrays.asList("one", "two", "three")}}); + } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/tools/array/FirstElementUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/array/FirstElementUDFTest.java b/core/src/test/java/hivemall/tools/array/FirstElementUDFTest.java index 73ba47a..241b8b9 100644 --- a/core/src/test/java/hivemall/tools/array/FirstElementUDFTest.java +++ b/core/src/test/java/hivemall/tools/array/FirstElementUDFTest.java @@ -18,9 +18,11 @@ */ package hivemall.tools.array; +import hivemall.TestUtils; import hivemall.utils.hadoop.WritableUtils; import java.io.IOException; +import java.util.Arrays; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; @@ -61,4 +63,12 @@ public class FirstElementUDFTest { udf.close(); } + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization( + FirstElementUDF.class, + new ObjectInspector[] {ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector)}, + new Object[] {Arrays.asList(0.d, 1.d, 2.d)}); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/tools/array/LastElementUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/array/LastElementUDFTest.java b/core/src/test/java/hivemall/tools/array/LastElementUDFTest.java index 38c7361..325c28a 100644 --- a/core/src/test/java/hivemall/tools/array/LastElementUDFTest.java +++ b/core/src/test/java/hivemall/tools/array/LastElementUDFTest.java @@ -18,9 +18,11 @@ */ package hivemall.tools.array; +import hivemall.TestUtils; import hivemall.utils.hadoop.WritableUtils; import java.io.IOException; +import java.util.Arrays; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; @@ -61,4 +63,12 @@ public class LastElementUDFTest { udf.close(); } + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization( + FirstElementUDF.class, + new ObjectInspector[] {ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector)}, + new Object[] {Arrays.asList(0.d, 1.d, 2.d)}); + } + }
