incubator-hivemall git commit: [HIVEMALL-188] Avoid KryoException: java.lang.NullPointerException

2018-04-09 Thread myui
Repository: incubator-hivemall
Updated Branches:
  refs/heads/master 1e1b77ea4 -> 9580f0a8a


[HIVEMALL-188] Avoid KryoException: java.lang.NullPointerException

## What changes were proposed in this pull request?

Fix a bug in `tokenize_ja` that occasionally raises `KryoException: 
java.lang.NullPointerException`

## What type of PR is it?

Bug Fix

## What is the Jira issue?

https://issues.apache.org/jira/browse/HIVEMALL-188

## How was this patch tested?

Manual tests

## Checklist

(Please remove this section if not needed; check `x` for YES, blank for NO)

- [x] Did you apply source code formatter, i.e., `mvn formatter:format`, for 
your commit?

Author: Takuya Kitazawa 

Closes #142 from takuti/HIVEMALL-188.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/9580f0a8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/9580f0a8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/9580f0a8

Branch: refs/heads/master
Commit: 9580f0a8aba1aff381d7a75620999f61dfd8f3f5
Parents: 1e1b77e
Author: Takuya Kitazawa 
Authored: Tue Apr 10 14:16:13 2018 +0900
Committer: Makoto Yui 
Committed: Tue Apr 10 14:16:13 2018 +0900

--
 nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/9580f0a8/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
--
diff --git a/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java 
b/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
index 411c89e..384c317 100644
--- a/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
+++ b/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java
@@ -69,7 +69,10 @@ public final class KuromojiUDF extends GenericUDF {
 private static final long MAX_INPUT_STREAM_SIZE = 32L * 1024L * 1024L; // 
~32MB
 
 private Mode _mode;
-private CharArraySet _stopWords;
+
+// lazy instantiation to avoid 
org.apache.hive.com.esotericsoftware.kryo.KryoException: 
java.lang.NullPointerException
+private transient CharArraySet _stopWords;
+
 private Set _stopTags;
 private UserDictionary _userDict;
 



[3/3] incubator-hivemall git commit: Merged brickhouse functions #135

2018-04-09 Thread myui
Merged brickhouse functions #135


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/1e1b77ea
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/1e1b77ea
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/1e1b77ea

Branch: refs/heads/master
Commit: 1e1b77ea4724c48f56dd1f3aa15027506558dee1
Parents: eac4800
Author: Makoto Yui 
Authored: Mon Apr 9 16:04:37 2018 +0900
Committer: Makoto Yui 
Committed: Mon Apr 9 16:04:37 2018 +0900

--
 NOTICE  |   4 +-
 core/pom.xml|   5 +
 .../java/hivemall/common/OnlineVariance.java|  77 --
 .../hivemall/regression/AROWRegressionUDTF.java |   2 +-
 .../PassiveAggressiveRegressionUDTF.java|   2 +-
 .../java/hivemall/sketch/bloom/BloomAndUDF.java |  62 ++
 .../hivemall/sketch/bloom/BloomContainsUDF.java |  71 ++
 .../hivemall/sketch/bloom/BloomFilterUtils.java | 147 
 .../java/hivemall/sketch/bloom/BloomNotUDF.java |  59 ++
 .../java/hivemall/sketch/bloom/BloomOrUDF.java  |  62 ++
 .../java/hivemall/sketch/bloom/BloomUDAF.java   | 101 +++
 .../hivemall/statistics/MovingAverageUDTF.java  |  84 +++
 .../main/java/hivemall/tools/TryCastUDF.java|  82 +++
 .../hivemall/tools/array/ArrayAppendUDF.java| 103 +++
 .../hivemall/tools/array/ArrayElementAtUDF.java |  80 +++
 .../hivemall/tools/array/ArrayFlattenUDF.java   | 111 +++
 .../hivemall/tools/array/ArraySliceUDF.java | 141 
 .../hivemall/tools/array/ArrayUnionUDF.java | 112 +++
 .../tools/array/ConditionalEmitUDTF.java| 128 
 .../hivemall/tools/array/FirstElementUDF.java   |  68 ++
 .../hivemall/tools/array/LastElementUDF.java|  70 ++
 .../java/hivemall/tools/array/SubarrayUDF.java  |  48 --
 .../java/hivemall/tools/json/FromJsonUDF.java   | 148 
 .../java/hivemall/tools/json/ToJsonUDF.java |  94 +++
 .../java/hivemall/tools/sanity/AssertUDF.java   |  46 ++
 .../hivemall/tools/sanity/RaiseErrorUDF.java|  38 +
 .../hivemall/tools/vector/VectorAddUDF.java | 139 
 .../hivemall/tools/vector/VectorDotUDF.java | 178 +
 .../utils/collections/DoubleRingBuffer.java |   4 +
 .../java/hivemall/utils/hadoop/HiveUtils.java   |  80 ++-
 .../hivemall/utils/hadoop/JsonSerdeUtils.java   | 715 +++
 .../hivemall/utils/hadoop/WritableUtils.java|  27 +
 .../utils/hashing/HashFunctionFactory.java  |   1 -
 .../java/hivemall/utils/lang/StringUtils.java   |  16 +-
 .../hivemall/utils/stats/MovingAverage.java |  74 ++
 .../hivemall/utils/stats/OnlineVariance.java|  77 ++
 .../hivemall/common/OnlineVarianceTest.java |  89 ---
 .../hivemall/sketch/bloom/BloomAndUDFTest.java  |  89 +++
 .../sketch/bloom/BloomContainsUDFTest.java  |  71 ++
 .../sketch/bloom/BloomFilterUtilsTest.java  |  78 ++
 .../hivemall/sketch/bloom/BloomNotUDFTest.java  |  67 ++
 .../hivemall/sketch/bloom/BloomOrUDFTest.java   |  89 +++
 .../statistics/MovingAverageUDTFTest.java   |  68 ++
 .../java/hivemall/tools/TryCastUDFTest.java |  59 ++
 .../tools/array/ArrayAppendUDFTest.java | 106 +++
 .../tools/array/ArrayElementAtUDFTest.java  |  86 +++
 .../tools/array/ArrayFlattenUDFTest.java|  56 ++
 .../hivemall/tools/array/ArraySliceUDFTest.java | 119 +++
 .../hivemall/tools/array/ArrayUnionUDFTest.java |  65 ++
 .../tools/array/ConditionalEmitUDTFTest.java|  70 ++
 .../tools/array/FirstElementUDFTest.java|  66 ++
 .../tools/array/LastElementUDFTest.java |  66 ++
 .../hivemall/tools/json/FromJsonUDFTest.java|  82 +++
 .../java/hivemall/tools/json/ToJsonUDFTest.java |  52 ++
 .../hivemall/tools/sanity/AssertUDFTest.java|  39 +
 .../tools/sanity/RaiseErrorUDFTest.java |  32 +
 .../hivemall/tools/vector/VectorAddUDFTest.java |  85 +++
 .../hivemall/tools/vector/VectorDotUDFTest.java |  83 +++
 .../utils/collections/DoubleRingBufferTest.java |  24 +
 .../utils/hadoop/JsonSerdeUtilsTest.java| 365 ++
 .../hivemall/utils/stats/MovingAverageTest.java |  53 ++
 .../utils/stats/OnlineVarianceTest.java |  91 +++
 pom.xml |   6 +
 resources/ddl/define-all-as-permanent.hive  |   6 +-
 resources/ddl/define-all.hive   |   6 +-
 resources/ddl/define-all.spark  |   5 +-
 resources/ddl/define-udfs.td.hql|   3 +-
 .../org/apache/spark/sql/hive/HivemallOps.scala |  20 +-
 68 files changed, 5201 insertions(+), 251 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/NOTICE
--
diff --git a/NOTICE b/NOTICE
index 34b5f5d..385a198 100644

[2/3] incubator-hivemall git commit: Merged brickhouse functions #135

2018-04-09 Thread myui
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/main/java/hivemall/tools/sanity/RaiseErrorUDF.java
--
diff --git a/core/src/main/java/hivemall/tools/sanity/RaiseErrorUDF.java 
b/core/src/main/java/hivemall/tools/sanity/RaiseErrorUDF.java
new file mode 100644
index 000..194085c
--- /dev/null
+++ b/core/src/main/java/hivemall/tools/sanity/RaiseErrorUDF.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.tools.sanity;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+
+@Description(name = "raise_error", value = "_FUNC_() or _FUNC_(string msg) - 
Throws an error")
+@UDFType(deterministic = true, stateful = false)
+public final class RaiseErrorUDF extends UDF {
+
+public boolean evaluate() throws HiveException {
+throw new HiveException();
+}
+
+public boolean evaluate(String errorMessage) throws HiveException {
+throw new HiveException(errorMessage);
+}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/main/java/hivemall/tools/vector/VectorAddUDF.java
--
diff --git a/core/src/main/java/hivemall/tools/vector/VectorAddUDF.java 
b/core/src/main/java/hivemall/tools/vector/VectorAddUDF.java
new file mode 100644
index 000..8442ae3
--- /dev/null
+++ b/core/src/main/java/hivemall/tools/vector/VectorAddUDF.java
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.tools.vector;
+
+import hivemall.utils.hadoop.HiveUtils;
+import hivemall.utils.lang.StringUtils;
+
+import java.util.Arrays;
+import java.util.List;
+
+import javax.annotation.Nonnegative;
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+
+@Description(name = "vector_add",
+value = "_FUNC_(array x, array y) - Perform vector ADD 
operation.")
+@UDFType(deterministic = true, stateful = false)
+public final class VectorAddUDF extends GenericUDF {
+
+private ListObjectInspector xOI, yOI;
+private PrimitiveObjectInspector xElemOI, yElemOI;
+private boolean floatingPoints;
+
+@Override
+public ObjectInspector initialize(ObjectInspector[] argOIs) throws 
UDFArgumentException {
+if (argOIs.length != 2) {
+throw new UDFArgumentLengthException("Expected 2 arguments, but 
got " +