Repository: incubator-hivemall
Updated Branches:
  refs/heads/master 79a099e98 -> 9257a3509


Applied refactoring for #145


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/9257a350
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/9257a350
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/9257a350

Branch: refs/heads/master
Commit: 9257a3509bf3fb3bfb34a00e2ebd8d7c3d86f459
Parents: 7a6595c
Author: Makoto Yui <[email protected]>
Authored: Thu Apr 26 15:46:07 2018 +0900
Committer: Makoto Yui <[email protected]>
Committed: Thu Apr 26 15:49:28 2018 +0900

----------------------------------------------------------------------
 .../ftvec/trans/QuantifiedFeaturesUDTF.java     | 20 +++++------
 core/src/test/java/hivemall/TestUtils.java      |  1 +
 .../ftvec/trans/QuantifiedFeaturesUDTFTest.java |  1 +
 .../hivemall/nlp/tokenizer/KuromojiUDF.java     | 36 +++++++++++---------
 .../hivemall/nlp/tokenizer/KuromojiUDFTest.java |  5 +--
 5 files changed, 32 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/9257a350/core/src/main/java/hivemall/ftvec/trans/QuantifiedFeaturesUDTF.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/hivemall/ftvec/trans/QuantifiedFeaturesUDTF.java 
b/core/src/main/java/hivemall/ftvec/trans/QuantifiedFeaturesUDTF.java
index 5b2eefe..c036855 100644
--- a/core/src/main/java/hivemall/ftvec/trans/QuantifiedFeaturesUDTF.java
+++ b/core/src/main/java/hivemall/ftvec/trans/QuantifiedFeaturesUDTF.java
@@ -48,7 +48,8 @@ public final class QuantifiedFeaturesUDTF extends GenericUDTF 
{
     private Identifier<String>[] identifiers;
     private DoubleWritable[] columnValues;
 
-    private Object[] forwardObjs;
+    // lazy instantiation to avoid 
org.apache.hive.com.esotericsoftware.kryo.KryoException: 
java.lang.NullPointerException
+    private transient Object[] forwardObjs;
 
     @SuppressWarnings("unchecked")
     @Override
@@ -87,37 +88,32 @@ public final class QuantifiedFeaturesUDTF extends 
GenericUDTF {
 
     @Override
     public void process(Object[] args) throws HiveException {
-        int outputSize = args.length - 1;
         boolean outputRow = boolOI.get(args[0]);
         if (outputRow) {
+            final DoubleWritable[] values = this.columnValues;
             if (forwardObjs == null) {
-                // forwardObjs internally references columnValues
-                List<DoubleWritable> column = new ArrayList<>(outputSize);
-                this.forwardObjs = new Object[] {column};
-                for (int i = 0; i < outputSize; i++) {
-                    column.add(columnValues[i]);
-                }
+                this.forwardObjs = new Object[] {Arrays.asList(values)};
             }
             // updating columnValues simultaneously changes forwardObjs
-            for (int i = 0; i < outputSize; i++) {
+            for (int i = 0, outputSize = args.length - 1; i < outputSize; i++) 
{
                 Object arg = args[i + 1];
                 Identifier<String> identifier = identifiers[i];
                 if (identifier == null) {
                     double v = PrimitiveObjectInspectorUtils.getDouble(arg, 
doubleOIs[i]);
-                    columnValues[i].set(v);
+                    values[i].set(v);
                 } else {
                     if (arg == null) {
                         throw new HiveException("Found Null in the input: " + 
Arrays.toString(args));
                     } else {
                         String k = arg.toString();
                         int id = identifier.valueOf(k);
-                        columnValues[i].set(id);
+                        values[i].set(id);
                     }
                 }
             }
             forward(forwardObjs);
         } else {// load only
-            for (int i = 0; i < outputSize; i++) {
+            for (int i = 0, outputSize = args.length - 1; i < outputSize; i++) 
{
                 Identifier<String> identifier = identifiers[i];
                 if (identifier != null) {
                     Object arg = args[i + 1];

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/9257a350/core/src/test/java/hivemall/TestUtils.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/hivemall/TestUtils.java 
b/core/src/test/java/hivemall/TestUtils.java
index 7bd1ac3..1f1f1da 100644
--- a/core/src/test/java/hivemall/TestUtils.java
+++ b/core/src/test/java/hivemall/TestUtils.java
@@ -66,6 +66,7 @@ public final class TestUtils {
         udf.close();
     }
 
+    @SuppressWarnings("deprecation")
     public static <T extends GenericUDTF> void testGenericUDTFSerialization(
             @Nonnull Class<T> clazz, @Nonnull ObjectInspector[] ois, @Nonnull 
Object[][] rows)
             throws HiveException {

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/9257a350/core/src/test/java/hivemall/ftvec/trans/QuantifiedFeaturesUDTFTest.java
----------------------------------------------------------------------
diff --git 
a/core/src/test/java/hivemall/ftvec/trans/QuantifiedFeaturesUDTFTest.java 
b/core/src/test/java/hivemall/ftvec/trans/QuantifiedFeaturesUDTFTest.java
index 4cfc20e..9d5574f 100644
--- a/core/src/test/java/hivemall/ftvec/trans/QuantifiedFeaturesUDTFTest.java
+++ b/core/src/test/java/hivemall/ftvec/trans/QuantifiedFeaturesUDTFTest.java
@@ -49,6 +49,7 @@ public class QuantifiedFeaturesUDTFTest {
         udtf.setCollector(new Collector() {
             public void collect(Object input) throws HiveException {
                 Object[] row = (Object[]) input;
+                @SuppressWarnings("unchecked")
                 List<DoubleWritable> column = (List<DoubleWritable>) row[0];
                 List<Double> quantifiedInput = new ArrayList<>();
                 for (DoubleWritable elem : column) {

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/9257a350/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
----------------------------------------------------------------------
diff --git a/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java 
b/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
index 745ec30..cdf6dab 100644
--- a/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
+++ b/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
@@ -72,10 +72,12 @@ public final class KuromojiUDF extends GenericUDF {
     private static final int READ_TIMEOUT_MS = 60000; // 60 sec
     private static final long MAX_INPUT_STREAM_SIZE = 32L * 1024L * 1024L; // 
~32MB
 
-    private String _modeString;
+    private Mode _mode;
+    @Nullable
     private String[] _stopWordsArray;
-    private String[] _stopTagsArray;
-    private Object _userDictObj;
+    private Set<String> _stopTags;
+    @Nullable
+    private Object _userDictObj; // String[] or String
 
     // workaround to avoid 
org.apache.hive.com.esotericsoftware.kryo.KryoException: 
java.util.ConcurrentModificationException
     private transient JapaneseAnalyzer _analyzer;
@@ -88,19 +90,15 @@ public final class KuromojiUDF extends GenericUDF {
                     + arglen);
         }
 
-        this._modeString = (arglen >= 2) ? 
HiveUtils.getConstString(arguments[1]) : "NORMAL";
+        this._mode = (arglen >= 2) ? tokenizationMode(arguments[1]) : 
Mode.NORMAL;
 
-        this._stopWordsArray = null;
         if (arglen >= 3 && !HiveUtils.isVoidOI(arguments[2])) {
             this._stopWordsArray = HiveUtils.getConstStringArray(arguments[2]);
         }
 
-        this._stopTagsArray = null;
-        if (arglen >= 4 && !HiveUtils.isVoidOI(arguments[3])) {
-            this._stopTagsArray = HiveUtils.getConstStringArray(arguments[3]);
-        }
+        this._stopTags = (arglen >= 4) ? stopTags(arguments[3])
+                : JapaneseAnalyzer.getDefaultStopTags();
 
-        this._userDictObj = null;
         if (arglen >= 5) {
             if (HiveUtils.isConstListOI(arguments[4])) {
                 this._userDictObj = 
HiveUtils.getConstStringArray(arguments[4]);
@@ -120,9 +118,7 @@ public final class KuromojiUDF extends GenericUDF {
     @Override
     public List<Text> evaluate(DeferredObject[] arguments) throws 
HiveException {
         if (_analyzer == null) {
-            Mode mode = tokenizationMode(_modeString);
             CharArraySet stopWords = stopWords(_stopWordsArray);
-            Set<String> stopTags = stopTags(_stopTagsArray);
 
             UserDictionary userDict = null;
             if (_userDictObj instanceof String[]) {
@@ -131,7 +127,7 @@ public final class KuromojiUDF extends GenericUDF {
                 userDict = userDictionary((String) _userDictObj);
             }
 
-            this._analyzer = new JapaneseAnalyzer(userDict, mode, stopWords, 
stopTags);
+            this._analyzer = new JapaneseAnalyzer(userDict, _mode, stopWords, 
_stopTags);
         }
 
         Object arg0 = arguments[0].get();
@@ -162,7 +158,9 @@ public final class KuromojiUDF extends GenericUDF {
     }
 
     @Nonnull
-    private static Mode tokenizationMode(@Nullable final String arg) throws 
UDFArgumentException {
+    private static Mode tokenizationMode(@Nonnull final ObjectInspector oi)
+            throws UDFArgumentException {
+        String arg = HiveUtils.getConstString(oi);
         if (arg == null) {
             return Mode.NORMAL;
         }
@@ -191,12 +189,16 @@ public final class KuromojiUDF extends GenericUDF {
         if (array.length == 0) {
             return CharArraySet.EMPTY_SET;
         }
-        CharArraySet results = new CharArraySet(Arrays.asList(array), /* 
ignoreCase */true);
-        return results;
+        return new CharArraySet(Arrays.asList(array), /* ignoreCase */true);
     }
 
     @Nonnull
-    private static Set<String> stopTags(@Nullable final String[] array) throws 
UDFArgumentException {
+    private static Set<String> stopTags(@Nonnull final ObjectInspector oi)
+            throws UDFArgumentException {
+        if (HiveUtils.isVoidOI(oi)) {
+            return JapaneseAnalyzer.getDefaultStopTags();
+        }
+        final String[] array = HiveUtils.getConstStringArray(oi);
         if (array == null) {
             return JapaneseAnalyzer.getDefaultStopTags();
         }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/9257a350/nlp/src/test/java/hivemall/nlp/tokenizer/KuromojiUDFTest.java
----------------------------------------------------------------------
diff --git a/nlp/src/test/java/hivemall/nlp/tokenizer/KuromojiUDFTest.java 
b/nlp/src/test/java/hivemall/nlp/tokenizer/KuromojiUDFTest.java
index 1c3db9f..356507d 100644
--- a/nlp/src/test/java/hivemall/nlp/tokenizer/KuromojiUDFTest.java
+++ b/nlp/src/test/java/hivemall/nlp/tokenizer/KuromojiUDFTest.java
@@ -18,12 +18,12 @@
  */
 package hivemall.nlp.tokenizer;
 
+import hivemall.TestUtils;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
-import hivemall.TestUtils;
-
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
@@ -395,6 +395,7 @@ public class KuromojiUDFTest {
             public void prepare(int arg) throws HiveException {}
         };
         List<Text> tokens = udf.evaluate(args);
+        Assert.assertNotNull(tokens);
 
         // serialization after evaluation
         serialized = TestUtils.serializeObjectByKryo(udf);

Reply via email to