Repository: incubator-hivemall
Updated Branches:
  refs/heads/master 36fb839d9 -> b64b94f92


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/tools/array/SelectKBestUDFTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/hivemall/tools/array/SelectKBestUDFTest.java 
b/core/src/test/java/hivemall/tools/array/SelectKBestUDFTest.java
index fc5572d..685139d 100644
--- a/core/src/test/java/hivemall/tools/array/SelectKBestUDFTest.java
+++ b/core/src/test/java/hivemall/tools/array/SelectKBestUDFTest.java
@@ -21,6 +21,7 @@ package hivemall.tools.array;
 import hivemall.TestUtils;
 import hivemall.utils.hadoop.WritableUtils;
 
+import java.io.IOException;
 import java.util.List;
 
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -70,17 +71,22 @@ public class SelectKBestUDFTest {
     }
 
     @Test
-    public void testSerialization() throws HiveException {
-        final SelectKBestUDF selectKBest = new SelectKBestUDF();
+    public void testSerialization() throws HiveException, IOException {
         final int k = 2;
-        selectKBest.initialize(new ObjectInspector[] {
-                
ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector),
-                
ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector),
-                ObjectInspectorUtils.getConstantObjectInspector(
-                    PrimitiveObjectInspectorFactory.javaIntObjectInspector, 
k)});
+        final double[] data = new double[] {250.29999999999998, 
170.90000000000003, 73.2,
+                12.199999999999996};
+        final double[] importanceList = new double[] {292.1666753739119, 
152.70000455081467,
+                187.93333893418327, 59.93333511948589};
 
-        byte[] serialized = TestUtils.serializeObjectByKryo(selectKBest);
-        TestUtils.deserializeObjectByKryo(serialized, SelectKBestUDF.class);
+        TestUtils.testGenericUDFSerialization(
+            SelectKBestUDF.class,
+            new ObjectInspector[] {
+                    
ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector),
+                    
ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector),
+                    ObjectInspectorUtils.getConstantObjectInspector(
+                        
PrimitiveObjectInspectorFactory.javaIntObjectInspector, k)},
+            new Object[] {WritableUtils.toWritableList(data),
+                    WritableUtils.toWritableList(importanceList), k});
     }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/tools/json/FromJsonUDFTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/hivemall/tools/json/FromJsonUDFTest.java 
b/core/src/test/java/hivemall/tools/json/FromJsonUDFTest.java
index 6f7d4cf..a5b3550 100644
--- a/core/src/test/java/hivemall/tools/json/FromJsonUDFTest.java
+++ b/core/src/test/java/hivemall/tools/json/FromJsonUDFTest.java
@@ -18,11 +18,14 @@
  */
 package hivemall.tools.json;
 
+import hivemall.TestUtils;
 import hivemall.utils.hadoop.HiveUtils;
 
+import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
 
+import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -79,4 +82,12 @@ public class FromJsonUDFTest {
 
         udf.close();
     }
+
+    @Test
+    public void testSerialization() throws HiveException, IOException {
+        TestUtils.testGenericUDFSerialization(FromJsonUDF.class,
+            new ObjectInspector[] 
{PrimitiveObjectInspectorFactory.javaStringObjectInspector,
+                    HiveUtils.getConstStringObjectInspector("array<double>")},
+            new Object[] {"[0.1,1.1,2.2]"});
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/tools/json/ToJsonUDFTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/hivemall/tools/json/ToJsonUDFTest.java 
b/core/src/test/java/hivemall/tools/json/ToJsonUDFTest.java
index 4da6b9a..5223849 100644
--- a/core/src/test/java/hivemall/tools/json/ToJsonUDFTest.java
+++ b/core/src/test/java/hivemall/tools/json/ToJsonUDFTest.java
@@ -18,8 +18,10 @@
  */
 package hivemall.tools.json;
 
+import hivemall.TestUtils;
 import hivemall.utils.hadoop.WritableUtils;
 
+import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -29,6 +31,9 @@ import org.apache.hadoop.io.Text;
 import org.junit.Assert;
 import org.junit.Test;
 
+import java.io.IOException;
+import java.util.Arrays;
+
 public class ToJsonUDFTest {
 
     @Test
@@ -47,4 +52,12 @@ public class ToJsonUDFTest {
         udf.close();
     }
 
+    @Test
+    public void testSerialization() throws HiveException, IOException {
+        TestUtils.testGenericUDFSerialization(
+            ToJsonUDF.class,
+            new ObjectInspector[] 
{ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector)},
+            new Object[] {Arrays.asList(0.1d, 1.1d, 2.1d)});
+    }
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/tools/vector/VectorAddUDFTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/hivemall/tools/vector/VectorAddUDFTest.java 
b/core/src/test/java/hivemall/tools/vector/VectorAddUDFTest.java
index 9012c63..23018f8 100644
--- a/core/src/test/java/hivemall/tools/vector/VectorAddUDFTest.java
+++ b/core/src/test/java/hivemall/tools/vector/VectorAddUDFTest.java
@@ -18,6 +18,7 @@
  */
 package hivemall.tools.vector;
 
+import hivemall.TestUtils;
 import hivemall.utils.hadoop.WritableUtils;
 
 import java.io.IOException;
@@ -77,4 +78,14 @@ public class VectorAddUDFTest {
 
         udf.close();
     }
+
+    @Test
+    public void testSerialization() throws HiveException, IOException {
+        TestUtils.testGenericUDFSerialization(
+            VectorAddUDF.class,
+            new ObjectInspector[] {
+                    
ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector),
+                    
ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaFloatObjectInspector)},
+            new Object[] {Arrays.asList(1.d, 2.d, 3.d), Arrays.asList(2.f, 
3.f, 4.f)});
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/tools/vector/VectorDotUDFTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/hivemall/tools/vector/VectorDotUDFTest.java 
b/core/src/test/java/hivemall/tools/vector/VectorDotUDFTest.java
index 6d7c05e..f0fe9d1 100644
--- a/core/src/test/java/hivemall/tools/vector/VectorDotUDFTest.java
+++ b/core/src/test/java/hivemall/tools/vector/VectorDotUDFTest.java
@@ -18,6 +18,7 @@
  */
 package hivemall.tools.vector;
 
+import hivemall.TestUtils;
 import hivemall.utils.hadoop.WritableUtils;
 
 import java.io.IOException;
@@ -76,4 +77,14 @@ public class VectorDotUDFTest {
 
         udf.close();
     }
+
+    @Test
+    public void testSerialization() throws HiveException, IOException {
+        TestUtils.testGenericUDFSerialization(
+            VectorDotUDF.class,
+            new ObjectInspector[] {
+                    
ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector),
+                    
ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaFloatObjectInspector)},
+            new Object[] {Arrays.asList(1.d, 2.d, 3.d), Arrays.asList(2.f, 
3.f, 4.f)});
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/topicmodel/LDAUDTFTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/hivemall/topicmodel/LDAUDTFTest.java 
b/core/src/test/java/hivemall/topicmodel/LDAUDTFTest.java
index 4cbb668..fc725b9 100644
--- a/core/src/test/java/hivemall/topicmodel/LDAUDTFTest.java
+++ b/core/src/test/java/hivemall/topicmodel/LDAUDTFTest.java
@@ -23,6 +23,7 @@ import java.util.Map;
 import java.util.SortedMap;
 import java.util.Arrays;
 
+import hivemall.TestUtils;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -161,6 +162,21 @@ public class LDAUDTFTest {
             udtf.getWordScore("アボカド", k2) > 
udtf.getWordScore("健康", k2));
     }
 
+    @Test
+    public void testSerialization() throws HiveException {
+        TestUtils.testGenericUDTFSerialization(
+            LDAUDTF.class,
+            new ObjectInspector[] {
+                    
ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector),
+                    ObjectInspectorUtils.getConstantObjectInspector(
+                        
PrimitiveObjectInspectorFactory.javaStringObjectInspector,
+                        "-topics 2 -num_docs 2 -s 1 -iter 32 -eps 1e-3")},
+            new Object[][] {
+                    {Arrays.asList("fruits:1", "healthy:1", "vegetables:1")},
+                    {Arrays.asList("apples:1", "avocados:1", "colds:1", 
"flu:1", "like:2",
+                        "oranges:1")}});
+    }
+
     private static void println(String msg) {
         if (DEBUG) {
             System.out.println(msg);

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/core/src/test/java/hivemall/topicmodel/PLSAUDTFTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/hivemall/topicmodel/PLSAUDTFTest.java 
b/core/src/test/java/hivemall/topicmodel/PLSAUDTFTest.java
index e5045a5..87b2f4c 100644
--- a/core/src/test/java/hivemall/topicmodel/PLSAUDTFTest.java
+++ b/core/src/test/java/hivemall/topicmodel/PLSAUDTFTest.java
@@ -23,6 +23,7 @@ import java.util.Map;
 import java.util.SortedMap;
 import java.util.Arrays;
 
+import hivemall.TestUtils;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -162,6 +163,21 @@ public class PLSAUDTFTest {
             udtf.getWordScore("アボカド", k2) > 
udtf.getWordScore("健康", k2));
     }
 
+    @Test
+    public void testSerialization() throws HiveException {
+        TestUtils.testGenericUDTFSerialization(
+            PLSAUDTF.class,
+            new ObjectInspector[] {
+                    
ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector),
+                    ObjectInspectorUtils.getConstantObjectInspector(
+                        
PrimitiveObjectInspectorFactory.javaStringObjectInspector,
+                        "-topics 2 -alpha 0.1 -delta 0.00001 -iter 10000")},
+            new Object[][] {
+                    {Arrays.asList("fruits:1", "healthy:1", "vegetables:1")},
+                    {Arrays.asList("apples:1", "avocados:1", "colds:1", 
"flu:1", "like:2",
+                        "oranges:1")}});
+    }
+
     private static void println(String msg) {
         if (DEBUG) {
             System.out.println(msg);

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
----------------------------------------------------------------------
diff --git a/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java 
b/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
index 96d9e4b..745ec30 100644
--- a/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
+++ b/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
@@ -72,13 +72,10 @@ public final class KuromojiUDF extends GenericUDF {
     private static final int READ_TIMEOUT_MS = 60000; // 60 sec
     private static final long MAX_INPUT_STREAM_SIZE = 32L * 1024L * 1024L; // 
~32MB
 
-    private Mode _mode;
-
-    // lazy instantiation to avoid 
org.apache.hive.com.esotericsoftware.kryo.KryoException: 
java.lang.NullPointerException
-    private transient CharArraySet _stopWords;
-
-    private Set<String> _stopTags;
-    private UserDictionary _userDict;
+    private String _modeString;
+    private String[] _stopWordsArray;
+    private String[] _stopTagsArray;
+    private Object _userDictObj;
 
     // workaround to avoid 
org.apache.hive.com.esotericsoftware.kryo.KryoException: 
java.util.ConcurrentModificationException
     private transient JapaneseAnalyzer _analyzer;
@@ -91,12 +88,29 @@ public final class KuromojiUDF extends GenericUDF {
                     + arglen);
         }
 
-        this._mode = (arglen >= 2) ? tokenizationMode(arguments[1]) : 
Mode.NORMAL;
-        this._stopWords = (arglen >= 3) ? stopWords(arguments[2])
-                : JapaneseAnalyzer.getDefaultStopSet();
-        this._stopTags = (arglen >= 4) ? stopTags(arguments[3])
-                : JapaneseAnalyzer.getDefaultStopTags();
-        this._userDict = (arglen >= 5) ? userDictionary(arguments[4]) : null;
+        this._modeString = (arglen >= 2) ? 
HiveUtils.getConstString(arguments[1]) : "NORMAL";
+
+        this._stopWordsArray = null;
+        if (arglen >= 3 && !HiveUtils.isVoidOI(arguments[2])) {
+            this._stopWordsArray = HiveUtils.getConstStringArray(arguments[2]);
+        }
+
+        this._stopTagsArray = null;
+        if (arglen >= 4 && !HiveUtils.isVoidOI(arguments[3])) {
+            this._stopTagsArray = HiveUtils.getConstStringArray(arguments[3]);
+        }
+
+        this._userDictObj = null;
+        if (arglen >= 5) {
+            if (HiveUtils.isConstListOI(arguments[4])) {
+                this._userDictObj = 
HiveUtils.getConstStringArray(arguments[4]);
+            } else if (HiveUtils.isConstString(arguments[4])) {
+                this._userDictObj = HiveUtils.getConstString(arguments[4]);
+            } else {
+                throw new UDFArgumentException(
+                    "User dictionary MUST be given as an array of constant 
string or constant string (URL)");
+            }
+        }
 
         this._analyzer = null;
 
@@ -106,7 +120,18 @@ public final class KuromojiUDF extends GenericUDF {
     @Override
     public List<Text> evaluate(DeferredObject[] arguments) throws 
HiveException {
         if (_analyzer == null) {
-            this._analyzer = new JapaneseAnalyzer(_userDict, _mode, 
_stopWords, _stopTags);
+            Mode mode = tokenizationMode(_modeString);
+            CharArraySet stopWords = stopWords(_stopWordsArray);
+            Set<String> stopTags = stopTags(_stopTagsArray);
+
+            UserDictionary userDict = null;
+            if (_userDictObj instanceof String[]) {
+                userDict = userDictionary((String[]) _userDictObj);
+            } else if (_userDictObj instanceof String) {
+                userDict = userDictionary((String) _userDictObj);
+            }
+
+            this._analyzer = new JapaneseAnalyzer(userDict, mode, stopWords, 
stopTags);
         }
 
         Object arg0 = arguments[0].get();
@@ -137,9 +162,7 @@ public final class KuromojiUDF extends GenericUDF {
     }
 
     @Nonnull
-    private static Mode tokenizationMode(@Nonnull final ObjectInspector oi)
-            throws UDFArgumentException {
-        final String arg = HiveUtils.getConstString(oi);
+    private static Mode tokenizationMode(@Nullable final String arg) throws 
UDFArgumentException {
         if (arg == null) {
             return Mode.NORMAL;
         }
@@ -160,12 +183,8 @@ public final class KuromojiUDF extends GenericUDF {
     }
 
     @Nonnull
-    private static CharArraySet stopWords(@Nonnull final ObjectInspector oi)
+    private static CharArraySet stopWords(@Nullable final String[] array)
             throws UDFArgumentException {
-        if (HiveUtils.isVoidOI(oi)) {
-            return JapaneseAnalyzer.getDefaultStopSet();
-        }
-        final String[] array = HiveUtils.getConstStringArray(oi);
         if (array == null) {
             return JapaneseAnalyzer.getDefaultStopSet();
         }
@@ -177,12 +196,7 @@ public final class KuromojiUDF extends GenericUDF {
     }
 
     @Nonnull
-    private static Set<String> stopTags(@Nonnull final ObjectInspector oi)
-            throws UDFArgumentException {
-        if (HiveUtils.isVoidOI(oi)) {
-            return JapaneseAnalyzer.getDefaultStopTags();
-        }
-        final String[] array = HiveUtils.getConstStringArray(oi);
+    private static Set<String> stopTags(@Nullable final String[] array) throws 
UDFArgumentException {
         if (array == null) {
             return JapaneseAnalyzer.getDefaultStopTags();
         }
@@ -201,19 +215,6 @@ public final class KuromojiUDF extends GenericUDF {
     }
 
     @Nullable
-    private static UserDictionary userDictionary(@Nonnull final 
ObjectInspector oi)
-            throws UDFArgumentException {
-        if (HiveUtils.isConstListOI(oi)) {
-            return userDictionary(HiveUtils.getConstStringArray(oi));
-        } else if (HiveUtils.isConstString(oi)) {
-            return userDictionary(HiveUtils.getConstString(oi));
-        } else {
-            throw new UDFArgumentException(
-                "User dictionary MUST be given as an array of constant string 
or constant string (URL)");
-        }
-    }
-
-    @Nullable
     private static UserDictionary userDictionary(@Nullable final String[] 
userDictArray)
             throws UDFArgumentException {
         if (userDictArray == null) {

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/nlp/src/main/java/hivemall/nlp/tokenizer/SmartcnUDF.java
----------------------------------------------------------------------
diff --git a/nlp/src/main/java/hivemall/nlp/tokenizer/SmartcnUDF.java 
b/nlp/src/main/java/hivemall/nlp/tokenizer/SmartcnUDF.java
index afaa485..d185b0d 100644
--- a/nlp/src/main/java/hivemall/nlp/tokenizer/SmartcnUDF.java
+++ b/nlp/src/main/java/hivemall/nlp/tokenizer/SmartcnUDF.java
@@ -27,6 +27,7 @@ import java.util.Arrays;
 import java.util.List;
 
 import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
 
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
@@ -102,7 +103,7 @@ public final class SmartcnUDF extends GenericUDF {
     }
 
     @Nonnull
-    private static CharArraySet stopWords(@Nonnull final String[] array)
+    private static CharArraySet stopWords(@Nullable final String[] array)
             throws UDFArgumentException {
         if (array == null) {
             return SmartChineseAnalyzer.getDefaultStopSet();

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/nlp/src/test/java/hivemall/TestUtils.java
----------------------------------------------------------------------
diff --git a/nlp/src/test/java/hivemall/TestUtils.java 
b/nlp/src/test/java/hivemall/TestUtils.java
new file mode 100644
index 0000000..c886c2d
--- /dev/null
+++ b/nlp/src/test/java/hivemall/TestUtils.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall;
+
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hive.com.esotericsoftware.kryo.Kryo;
+import org.apache.hive.com.esotericsoftware.kryo.io.Input;
+import org.apache.hive.com.esotericsoftware.kryo.io.Output;
+
+import javax.annotation.Nonnull;
+import java.io.ByteArrayOutputStream;
+
+public final class TestUtils {
+
+    @Nonnull
+    public static byte[] serializeObjectByKryo(@Nonnull Object obj) {
+        Kryo kryo = getKryo();
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        Output output = new Output(bos);
+        kryo.writeObject(output, obj);
+        output.close();
+        return bos.toByteArray();
+    }
+
+    @Nonnull
+    public static <T> T deserializeObjectByKryo(@Nonnull byte[] in, @Nonnull 
Class<T> clazz) {
+        Kryo kryo = getKryo();
+        Input inp = new Input(in);
+        T t = kryo.readObject(inp, clazz);
+        inp.close();
+        return t;
+    }
+
+    @Nonnull
+    private static Kryo getKryo() {
+        return Utilities.runtimeSerializationKryo.get();
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/nlp/src/test/java/hivemall/nlp/tokenizer/KuromojiUDFTest.java
----------------------------------------------------------------------
diff --git a/nlp/src/test/java/hivemall/nlp/tokenizer/KuromojiUDFTest.java 
b/nlp/src/test/java/hivemall/nlp/tokenizer/KuromojiUDFTest.java
index f9acc82..1c3db9f 100644
--- a/nlp/src/test/java/hivemall/nlp/tokenizer/KuromojiUDFTest.java
+++ b/nlp/src/test/java/hivemall/nlp/tokenizer/KuromojiUDFTest.java
@@ -22,6 +22,8 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
+import hivemall.TestUtils;
+
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
@@ -33,10 +35,6 @@ import 
org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.io.Text;
 import org.junit.Assert;
 import org.junit.Test;
-import org.objenesis.strategy.StdInstantiatorStrategy;
-
-import com.esotericsoftware.kryo.Kryo;
-import com.esotericsoftware.kryo.io.Output;
 
 public class KuromojiUDFTest {
 
@@ -80,7 +78,7 @@ public class KuromojiUDFTest {
     }
 
     @Test(expected = UDFArgumentException.class)
-    public void testInvalidMode() throws UDFArgumentException, IOException {
+    public void testInvalidMode() throws IOException, HiveException {
         GenericUDF udf = new KuromojiUDF();
         ObjectInspector[] argOIs = new ObjectInspector[2];
         // line
@@ -91,6 +89,18 @@ public class KuromojiUDFTest {
         argOIs[1] = 
PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
             stringType, new Text("unsupported mode"));
         udf.initialize(argOIs);
+
+        DeferredObject[] args = new DeferredObject[1];
+        args[0] = new DeferredObject() {
+            public Text get() throws HiveException {
+                return new 
Text("クロモジのJapaneseAnalyzerを使ってみる。テスト。");
+            }
+
+            @Override
+            public void prepare(int arg) throws HiveException {}
+        };
+        udf.evaluate(args);
+
         udf.close();
     }
 
@@ -365,16 +375,31 @@ public class KuromojiUDFTest {
     }
 
     @Test
-    public void testSerializeByKryo() throws UDFArgumentException {
+    public void testSerialization() throws IOException, HiveException {
         final KuromojiUDF udf = new KuromojiUDF();
         ObjectInspector[] argOIs = new ObjectInspector[1];
         argOIs[0] = 
PrimitiveObjectInspectorFactory.writableStringObjectInspector;
         udf.initialize(argOIs);
 
-        Kryo kryo = new Kryo();
-        kryo.setInstantiatorStrategy(new StdInstantiatorStrategy());
-        Output output = new Output(1024 * 16);
-        kryo.writeObject(output, udf);
-        output.close();
+        // serialization after initialization
+        byte[] serialized = TestUtils.serializeObjectByKryo(udf);
+        TestUtils.deserializeObjectByKryo(serialized, KuromojiUDF.class);
+
+        DeferredObject[] args = new DeferredObject[1];
+        args[0] = new DeferredObject() {
+            public Text get() throws HiveException {
+                return new 
Text("クロモジのJapaneseAnalyzerを使ってみる。テスト。");
+            }
+
+            @Override
+            public void prepare(int arg) throws HiveException {}
+        };
+        List<Text> tokens = udf.evaluate(args);
+
+        // serialization after evaluation
+        serialized = TestUtils.serializeObjectByKryo(udf);
+        TestUtils.deserializeObjectByKryo(serialized, KuromojiUDF.class);
+
+        udf.close();
     }
 }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b64b94f9/nlp/src/test/java/hivemall/nlp/tokenizer/SmartcnUDFTest.java
----------------------------------------------------------------------
diff --git a/nlp/src/test/java/hivemall/nlp/tokenizer/SmartcnUDFTest.java 
b/nlp/src/test/java/hivemall/nlp/tokenizer/SmartcnUDFTest.java
index 83ccb0c..342e48a 100644
--- a/nlp/src/test/java/hivemall/nlp/tokenizer/SmartcnUDFTest.java
+++ b/nlp/src/test/java/hivemall/nlp/tokenizer/SmartcnUDFTest.java
@@ -21,6 +21,8 @@ package hivemall.nlp.tokenizer;
 import java.io.IOException;
 import java.util.List;
 
+import hivemall.TestUtils;
+
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
@@ -78,4 +80,33 @@ public class SmartcnUDFTest {
         Assert.assertNotNull(tokens);
         udf.close();
     }
+
+    @Test
+    public void testSerialization() throws IOException, HiveException {
+        final SmartcnUDF udf = new SmartcnUDF();
+        ObjectInspector[] argOIs = new ObjectInspector[1];
+        argOIs[0] = 
PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+        udf.initialize(argOIs);
+
+        // serialization after initialization
+        byte[] serialized = TestUtils.serializeObjectByKryo(udf);
+        TestUtils.deserializeObjectByKryo(serialized, SmartcnUDF.class);
+
+        DeferredObject[] args = new DeferredObject[1];
+        args[0] = new DeferredObject() {
+            public Text get() throws HiveException {
+                return new 
Text("Smartcn为Apache2.0协议的开源中文分词系统,Java语言编写,修改的中科院计算所ICTCLAS分词系统。");
+            }
+
+            @Override
+            public void prepare(int arg) throws HiveException {}
+        };
+        List<Text> tokens = udf.evaluate(args);
+
+        // serialization after evaluation
+        serialized = TestUtils.serializeObjectByKryo(udf);
+        TestUtils.deserializeObjectByKryo(serialized, SmartcnUDF.class);
+
+        udf.close();
+    }
 }

Reply via email to