Repository: incubator-hivemall Updated Branches: refs/heads/master 36fb839d9 -> b64b94f92
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/tools/array/SelectKBestUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/array/SelectKBestUDFTest.java b/core/src/test/java/hivemall/tools/array/SelectKBestUDFTest.java index fc5572d..685139d 100644 --- a/core/src/test/java/hivemall/tools/array/SelectKBestUDFTest.java +++ b/core/src/test/java/hivemall/tools/array/SelectKBestUDFTest.java @@ -21,6 +21,7 @@ package hivemall.tools.array; import hivemall.TestUtils; import hivemall.utils.hadoop.WritableUtils; +import java.io.IOException; import java.util.List; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -70,17 +71,22 @@ public class SelectKBestUDFTest { } @Test - public void testSerialization() throws HiveException { - final SelectKBestUDF selectKBest = new SelectKBestUDF(); + public void testSerialization() throws HiveException, IOException { final int k = 2; - selectKBest.initialize(new ObjectInspector[] { - ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector), - ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector), - ObjectInspectorUtils.getConstantObjectInspector( - PrimitiveObjectInspectorFactory.javaIntObjectInspector, k)}); + final double[] data = new double[] {250.29999999999998, 170.90000000000003, 73.2, + 12.199999999999996}; + final double[] importanceList = new double[] {292.1666753739119, 152.70000455081467, + 187.93333893418327, 59.93333511948589}; - byte[] serialized = TestUtils.serializeObjectByKryo(selectKBest); - TestUtils.deserializeObjectByKryo(serialized, SelectKBestUDF.class); + TestUtils.testGenericUDFSerialization( + SelectKBestUDF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector), + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector), + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaIntObjectInspector, k)}, + new Object[] {WritableUtils.toWritableList(data), + WritableUtils.toWritableList(importanceList), k}); } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/tools/json/FromJsonUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/json/FromJsonUDFTest.java b/core/src/test/java/hivemall/tools/json/FromJsonUDFTest.java index 6f7d4cf..a5b3550 100644 --- a/core/src/test/java/hivemall/tools/json/FromJsonUDFTest.java +++ b/core/src/test/java/hivemall/tools/json/FromJsonUDFTest.java @@ -18,11 +18,14 @@ */ package hivemall.tools.json; +import hivemall.TestUtils; import hivemall.utils.hadoop.HiveUtils; +import java.io.IOException; import java.util.Arrays; import java.util.List; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -79,4 +82,12 @@ public class FromJsonUDFTest { udf.close(); } + + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization(FromJsonUDF.class, + new ObjectInspector[] {PrimitiveObjectInspectorFactory.javaStringObjectInspector, + HiveUtils.getConstStringObjectInspector("array<double>")}, + new Object[] {"[0.1,1.1,2.2]"}); + } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/tools/json/ToJsonUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/json/ToJsonUDFTest.java b/core/src/test/java/hivemall/tools/json/ToJsonUDFTest.java index 4da6b9a..5223849 100644 --- a/core/src/test/java/hivemall/tools/json/ToJsonUDFTest.java +++ b/core/src/test/java/hivemall/tools/json/ToJsonUDFTest.java @@ -18,8 +18,10 @@ */ package hivemall.tools.json; +import hivemall.TestUtils; import hivemall.utils.hadoop.WritableUtils; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -29,6 +31,9 @@ import org.apache.hadoop.io.Text; import org.junit.Assert; import org.junit.Test; +import java.io.IOException; +import java.util.Arrays; + public class ToJsonUDFTest { @Test @@ -47,4 +52,12 @@ public class ToJsonUDFTest { udf.close(); } + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization( + ToJsonUDF.class, + new ObjectInspector[] {ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector)}, + new Object[] {Arrays.asList(0.1d, 1.1d, 2.1d)}); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/tools/vector/VectorAddUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/vector/VectorAddUDFTest.java b/core/src/test/java/hivemall/tools/vector/VectorAddUDFTest.java index 9012c63..23018f8 100644 --- a/core/src/test/java/hivemall/tools/vector/VectorAddUDFTest.java +++ b/core/src/test/java/hivemall/tools/vector/VectorAddUDFTest.java @@ -18,6 +18,7 @@ */ package hivemall.tools.vector; +import hivemall.TestUtils; import hivemall.utils.hadoop.WritableUtils; import java.io.IOException; @@ -77,4 +78,14 @@ public class VectorAddUDFTest { udf.close(); } + + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization( + VectorAddUDF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector), + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaFloatObjectInspector)}, + new Object[] {Arrays.asList(1.d, 2.d, 3.d), Arrays.asList(2.f, 3.f, 4.f)}); + } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/tools/vector/VectorDotUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/vector/VectorDotUDFTest.java b/core/src/test/java/hivemall/tools/vector/VectorDotUDFTest.java index 6d7c05e..f0fe9d1 100644 --- a/core/src/test/java/hivemall/tools/vector/VectorDotUDFTest.java +++ b/core/src/test/java/hivemall/tools/vector/VectorDotUDFTest.java @@ -18,6 +18,7 @@ */ package hivemall.tools.vector; +import hivemall.TestUtils; import hivemall.utils.hadoop.WritableUtils; import java.io.IOException; @@ -76,4 +77,14 @@ public class VectorDotUDFTest { udf.close(); } + + @Test + public void testSerialization() throws HiveException, IOException { + TestUtils.testGenericUDFSerialization( + VectorDotUDF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector), + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaFloatObjectInspector)}, + new Object[] {Arrays.asList(1.d, 2.d, 3.d), Arrays.asList(2.f, 3.f, 4.f)}); + } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/topicmodel/LDAUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/topicmodel/LDAUDTFTest.java b/core/src/test/java/hivemall/topicmodel/LDAUDTFTest.java index 4cbb668..fc725b9 100644 --- a/core/src/test/java/hivemall/topicmodel/LDAUDTFTest.java +++ b/core/src/test/java/hivemall/topicmodel/LDAUDTFTest.java @@ -23,6 +23,7 @@ import java.util.Map; import java.util.SortedMap; import java.util.Arrays; +import hivemall.TestUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -161,6 +162,21 @@ public class LDAUDTFTest { udtf.getWordScore("ã¢ãã«ã", k2) > udtf.getWordScore("å¥åº·", k2)); } + @Test + public void testSerialization() throws HiveException { + TestUtils.testGenericUDTFSerialization( + LDAUDTF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, + "-topics 2 -num_docs 2 -s 1 -iter 32 -eps 1e-3")}, + new Object[][] { + {Arrays.asList("fruits:1", "healthy:1", "vegetables:1")}, + {Arrays.asList("apples:1", "avocados:1", "colds:1", "flu:1", "like:2", + "oranges:1")}}); + } + private static void println(String msg) { if (DEBUG) { System.out.println(msg); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/topicmodel/PLSAUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/topicmodel/PLSAUDTFTest.java b/core/src/test/java/hivemall/topicmodel/PLSAUDTFTest.java index e5045a5..87b2f4c 100644 --- a/core/src/test/java/hivemall/topicmodel/PLSAUDTFTest.java +++ b/core/src/test/java/hivemall/topicmodel/PLSAUDTFTest.java @@ -23,6 +23,7 @@ import java.util.Map; import java.util.SortedMap; import java.util.Arrays; +import hivemall.TestUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -162,6 +163,21 @@ public class PLSAUDTFTest { udtf.getWordScore("ã¢ãã«ã", k2) > udtf.getWordScore("å¥åº·", k2)); } + @Test + public void testSerialization() throws HiveException { + TestUtils.testGenericUDTFSerialization( + PLSAUDTF.class, + new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), + ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, + "-topics 2 -alpha 0.1 -delta 0.00001 -iter 10000")}, + new Object[][] { + {Arrays.asList("fruits:1", "healthy:1", "vegetables:1")}, + {Arrays.asList("apples:1", "avocados:1", "colds:1", "flu:1", "like:2", + "oranges:1")}}); + } + private static void println(String msg) { if (DEBUG) { System.out.println(msg); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java ---------------------------------------------------------------------- diff --git a/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java b/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java index 96d9e4b..745ec30 100644 --- a/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java +++ b/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java @@ -72,13 +72,10 @@ public final class KuromojiUDF extends GenericUDF { private static final int READ_TIMEOUT_MS = 60000; // 60 sec private static final long MAX_INPUT_STREAM_SIZE = 32L * 1024L * 1024L; // ~32MB - private Mode _mode; - - // lazy instantiation to avoid org.apache.hive.com.esotericsoftware.kryo.KryoException: java.lang.NullPointerException - private transient CharArraySet _stopWords; - - private Set<String> _stopTags; - private UserDictionary _userDict; + private String _modeString; + private String[] _stopWordsArray; + private String[] _stopTagsArray; + private Object _userDictObj; // workaround to avoid org.apache.hive.com.esotericsoftware.kryo.KryoException: java.util.ConcurrentModificationException private transient JapaneseAnalyzer _analyzer; @@ -91,12 +88,29 @@ public final class KuromojiUDF extends GenericUDF { + arglen); } - this._mode = (arglen >= 2) ? tokenizationMode(arguments[1]) : Mode.NORMAL; - this._stopWords = (arglen >= 3) ? stopWords(arguments[2]) - : JapaneseAnalyzer.getDefaultStopSet(); - this._stopTags = (arglen >= 4) ? stopTags(arguments[3]) - : JapaneseAnalyzer.getDefaultStopTags(); - this._userDict = (arglen >= 5) ? userDictionary(arguments[4]) : null; + this._modeString = (arglen >= 2) ? HiveUtils.getConstString(arguments[1]) : "NORMAL"; + + this._stopWordsArray = null; + if (arglen >= 3 && !HiveUtils.isVoidOI(arguments[2])) { + this._stopWordsArray = HiveUtils.getConstStringArray(arguments[2]); + } + + this._stopTagsArray = null; + if (arglen >= 4 && !HiveUtils.isVoidOI(arguments[3])) { + this._stopTagsArray = HiveUtils.getConstStringArray(arguments[3]); + } + + this._userDictObj = null; + if (arglen >= 5) { + if (HiveUtils.isConstListOI(arguments[4])) { + this._userDictObj = HiveUtils.getConstStringArray(arguments[4]); + } else if (HiveUtils.isConstString(arguments[4])) { + this._userDictObj = HiveUtils.getConstString(arguments[4]); + } else { + throw new UDFArgumentException( + "User dictionary MUST be given as an array of constant string or constant string (URL)"); + } + } this._analyzer = null; @@ -106,7 +120,18 @@ public final class KuromojiUDF extends GenericUDF { @Override public List<Text> evaluate(DeferredObject[] arguments) throws HiveException { if (_analyzer == null) { - this._analyzer = new JapaneseAnalyzer(_userDict, _mode, _stopWords, _stopTags); + Mode mode = tokenizationMode(_modeString); + CharArraySet stopWords = stopWords(_stopWordsArray); + Set<String> stopTags = stopTags(_stopTagsArray); + + UserDictionary userDict = null; + if (_userDictObj instanceof String[]) { + userDict = userDictionary((String[]) _userDictObj); + } else if (_userDictObj instanceof String) { + userDict = userDictionary((String) _userDictObj); + } + + this._analyzer = new JapaneseAnalyzer(userDict, mode, stopWords, stopTags); } Object arg0 = arguments[0].get(); @@ -137,9 +162,7 @@ public final class KuromojiUDF extends GenericUDF { } @Nonnull - private static Mode tokenizationMode(@Nonnull final ObjectInspector oi) - throws UDFArgumentException { - final String arg = HiveUtils.getConstString(oi); + private static Mode tokenizationMode(@Nullable final String arg) throws UDFArgumentException { if (arg == null) { return Mode.NORMAL; } @@ -160,12 +183,8 @@ public final class KuromojiUDF extends GenericUDF { } @Nonnull - private static CharArraySet stopWords(@Nonnull final ObjectInspector oi) + private static CharArraySet stopWords(@Nullable final String[] array) throws UDFArgumentException { - if (HiveUtils.isVoidOI(oi)) { - return JapaneseAnalyzer.getDefaultStopSet(); - } - final String[] array = HiveUtils.getConstStringArray(oi); if (array == null) { return JapaneseAnalyzer.getDefaultStopSet(); } @@ -177,12 +196,7 @@ public final class KuromojiUDF extends GenericUDF { } @Nonnull - private static Set<String> stopTags(@Nonnull final ObjectInspector oi) - throws UDFArgumentException { - if (HiveUtils.isVoidOI(oi)) { - return JapaneseAnalyzer.getDefaultStopTags(); - } - final String[] array = HiveUtils.getConstStringArray(oi); + private static Set<String> stopTags(@Nullable final String[] array) throws UDFArgumentException { if (array == null) { return JapaneseAnalyzer.getDefaultStopTags(); } @@ -201,19 +215,6 @@ public final class KuromojiUDF extends GenericUDF { } @Nullable - private static UserDictionary userDictionary(@Nonnull final ObjectInspector oi) - throws UDFArgumentException { - if (HiveUtils.isConstListOI(oi)) { - return userDictionary(HiveUtils.getConstStringArray(oi)); - } else if (HiveUtils.isConstString(oi)) { - return userDictionary(HiveUtils.getConstString(oi)); - } else { - throw new UDFArgumentException( - "User dictionary MUST be given as an array of constant string or constant string (URL)"); - } - } - - @Nullable private static UserDictionary userDictionary(@Nullable final String[] userDictArray) throws UDFArgumentException { if (userDictArray == null) { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/nlp/src/main/java/hivemall/nlp/tokenizer/SmartcnUDF.java ---------------------------------------------------------------------- diff --git a/nlp/src/main/java/hivemall/nlp/tokenizer/SmartcnUDF.java b/nlp/src/main/java/hivemall/nlp/tokenizer/SmartcnUDF.java index afaa485..d185b0d 100644 --- a/nlp/src/main/java/hivemall/nlp/tokenizer/SmartcnUDF.java +++ b/nlp/src/main/java/hivemall/nlp/tokenizer/SmartcnUDF.java @@ -27,6 +27,7 @@ import java.util.Arrays; import java.util.List; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; @@ -102,7 +103,7 @@ public final class SmartcnUDF extends GenericUDF { } @Nonnull - private static CharArraySet stopWords(@Nonnull final String[] array) + private static CharArraySet stopWords(@Nullable final String[] array) throws UDFArgumentException { if (array == null) { return SmartChineseAnalyzer.getDefaultStopSet(); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/nlp/src/test/java/hivemall/TestUtils.java ---------------------------------------------------------------------- diff --git a/nlp/src/test/java/hivemall/TestUtils.java b/nlp/src/test/java/hivemall/TestUtils.java new file mode 100644 index 0000000..c886c2d --- /dev/null +++ b/nlp/src/test/java/hivemall/TestUtils.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall; + +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hive.com.esotericsoftware.kryo.Kryo; +import org.apache.hive.com.esotericsoftware.kryo.io.Input; +import org.apache.hive.com.esotericsoftware.kryo.io.Output; + +import javax.annotation.Nonnull; +import java.io.ByteArrayOutputStream; + +public final class TestUtils { + + @Nonnull + public static byte[] serializeObjectByKryo(@Nonnull Object obj) { + Kryo kryo = getKryo(); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + Output output = new Output(bos); + kryo.writeObject(output, obj); + output.close(); + return bos.toByteArray(); + } + + @Nonnull + public static <T> T deserializeObjectByKryo(@Nonnull byte[] in, @Nonnull Class<T> clazz) { + Kryo kryo = getKryo(); + Input inp = new Input(in); + T t = kryo.readObject(inp, clazz); + inp.close(); + return t; + } + + @Nonnull + private static Kryo getKryo() { + return Utilities.runtimeSerializationKryo.get(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/nlp/src/test/java/hivemall/nlp/tokenizer/KuromojiUDFTest.java ---------------------------------------------------------------------- diff --git a/nlp/src/test/java/hivemall/nlp/tokenizer/KuromojiUDFTest.java b/nlp/src/test/java/hivemall/nlp/tokenizer/KuromojiUDFTest.java index f9acc82..1c3db9f 100644 --- a/nlp/src/test/java/hivemall/nlp/tokenizer/KuromojiUDFTest.java +++ b/nlp/src/test/java/hivemall/nlp/tokenizer/KuromojiUDFTest.java @@ -22,6 +22,8 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import hivemall.TestUtils; + import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; @@ -33,10 +35,6 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.io.Text; import org.junit.Assert; import org.junit.Test; -import org.objenesis.strategy.StdInstantiatorStrategy; - -import com.esotericsoftware.kryo.Kryo; -import com.esotericsoftware.kryo.io.Output; public class KuromojiUDFTest { @@ -80,7 +78,7 @@ public class KuromojiUDFTest { } @Test(expected = UDFArgumentException.class) - public void testInvalidMode() throws UDFArgumentException, IOException { + public void testInvalidMode() throws IOException, HiveException { GenericUDF udf = new KuromojiUDF(); ObjectInspector[] argOIs = new ObjectInspector[2]; // line @@ -91,6 +89,18 @@ public class KuromojiUDFTest { argOIs[1] = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( stringType, new Text("unsupported mode")); udf.initialize(argOIs); + + DeferredObject[] args = new DeferredObject[1]; + args[0] = new DeferredObject() { + public Text get() throws HiveException { + return new Text("ã¯ãã¢ã¸ã®JapaneseAnalyzerã使ã£ã¦ã¿ãããã¹ãã"); + } + + @Override + public void prepare(int arg) throws HiveException {} + }; + udf.evaluate(args); + udf.close(); } @@ -365,16 +375,31 @@ public class KuromojiUDFTest { } @Test - public void testSerializeByKryo() throws UDFArgumentException { + public void testSerialization() throws IOException, HiveException { final KuromojiUDF udf = new KuromojiUDF(); ObjectInspector[] argOIs = new ObjectInspector[1]; argOIs[0] = PrimitiveObjectInspectorFactory.writableStringObjectInspector; udf.initialize(argOIs); - Kryo kryo = new Kryo(); - kryo.setInstantiatorStrategy(new StdInstantiatorStrategy()); - Output output = new Output(1024 * 16); - kryo.writeObject(output, udf); - output.close(); + // serialization after initialization + byte[] serialized = TestUtils.serializeObjectByKryo(udf); + TestUtils.deserializeObjectByKryo(serialized, KuromojiUDF.class); + + DeferredObject[] args = new DeferredObject[1]; + args[0] = new DeferredObject() { + public Text get() throws HiveException { + return new Text("ã¯ãã¢ã¸ã®JapaneseAnalyzerã使ã£ã¦ã¿ãããã¹ãã"); + } + + @Override + public void prepare(int arg) throws HiveException {} + }; + List<Text> tokens = udf.evaluate(args); + + // serialization after evaluation + serialized = TestUtils.serializeObjectByKryo(udf); + TestUtils.deserializeObjectByKryo(serialized, KuromojiUDF.class); + + udf.close(); } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/nlp/src/test/java/hivemall/nlp/tokenizer/SmartcnUDFTest.java ---------------------------------------------------------------------- diff --git a/nlp/src/test/java/hivemall/nlp/tokenizer/SmartcnUDFTest.java b/nlp/src/test/java/hivemall/nlp/tokenizer/SmartcnUDFTest.java index 83ccb0c..342e48a 100644 --- a/nlp/src/test/java/hivemall/nlp/tokenizer/SmartcnUDFTest.java +++ b/nlp/src/test/java/hivemall/nlp/tokenizer/SmartcnUDFTest.java @@ -21,6 +21,8 @@ package hivemall.nlp.tokenizer; import java.io.IOException; import java.util.List; +import hivemall.TestUtils; + import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; @@ -78,4 +80,33 @@ public class SmartcnUDFTest { Assert.assertNotNull(tokens); udf.close(); } + + @Test + public void testSerialization() throws IOException, HiveException { + final SmartcnUDF udf = new SmartcnUDF(); + ObjectInspector[] argOIs = new ObjectInspector[1]; + argOIs[0] = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + udf.initialize(argOIs); + + // serialization after initialization + byte[] serialized = TestUtils.serializeObjectByKryo(udf); + TestUtils.deserializeObjectByKryo(serialized, SmartcnUDF.class); + + DeferredObject[] args = new DeferredObject[1]; + args[0] = new DeferredObject() { + public Text get() throws HiveException { + return new Text("Smartcn为Apache2.0åè®®ç弿ºä¸æåè¯ç³»ç»ï¼Javaè¯è¨ç¼åï¼ä¿®æ¹çä¸ç§é¢è®¡ç®æICTCLASåè¯ç³»ç»ã"); + } + + @Override + public void prepare(int arg) throws HiveException {} + }; + List<Text> tokens = udf.evaluate(args); + + // serialization after evaluation + serialized = TestUtils.serializeObjectByKryo(udf); + TestUtils.deserializeObjectByKryo(serialized, SmartcnUDF.class); + + udf.close(); + } }
