incubator-hivemall git commit: [HIVEMALL-188] Avoid KryoException: java.lang.NullPointerException
Repository: incubator-hivemall Updated Branches: refs/heads/master 1e1b77ea4 -> 9580f0a8a [HIVEMALL-188] Avoid KryoException: java.lang.NullPointerException ## What changes were proposed in this pull request? Fix a bug in `tokenize_ja` that occasionally raises `KryoException: java.lang.NullPointerException` ## What type of PR is it? Bug Fix ## What is the Jira issue? https://issues.apache.org/jira/browse/HIVEMALL-188 ## How was this patch tested? Manual tests ## Checklist (Please remove this section if not needed; check `x` for YES, blank for NO) - [x] Did you apply source code formatter, i.e., `mvn formatter:format`, for your commit? Author: Takuya KitazawaCloses #142 from takuti/HIVEMALL-188. Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/9580f0a8 Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/9580f0a8 Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/9580f0a8 Branch: refs/heads/master Commit: 9580f0a8aba1aff381d7a75620999f61dfd8f3f5 Parents: 1e1b77e Author: Takuya Kitazawa Authored: Tue Apr 10 14:16:13 2018 +0900 Committer: Makoto Yui Committed: Tue Apr 10 14:16:13 2018 +0900 -- nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/9580f0a8/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java -- diff --git a/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java b/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java index 411c89e..384c317 100644 --- a/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java +++ b/nlp/src/main/java/hivemall/nlp/tokenizer/KuromojiUDF.java @@ -69,7 +69,10 @@ public final class KuromojiUDF extends GenericUDF { private static final long MAX_INPUT_STREAM_SIZE = 32L * 1024L * 1024L; // ~32MB private Mode _mode; -private CharArraySet _stopWords; + +// lazy instantiation to avoid org.apache.hive.com.esotericsoftware.kryo.KryoException: java.lang.NullPointerException +private transient CharArraySet _stopWords; + private Set _stopTags; private UserDictionary _userDict;
[3/3] incubator-hivemall git commit: Merged brickhouse functions #135
Merged brickhouse functions #135 Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/1e1b77ea Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/1e1b77ea Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/1e1b77ea Branch: refs/heads/master Commit: 1e1b77ea4724c48f56dd1f3aa15027506558dee1 Parents: eac4800 Author: Makoto YuiAuthored: Mon Apr 9 16:04:37 2018 +0900 Committer: Makoto Yui Committed: Mon Apr 9 16:04:37 2018 +0900 -- NOTICE | 4 +- core/pom.xml| 5 + .../java/hivemall/common/OnlineVariance.java| 77 -- .../hivemall/regression/AROWRegressionUDTF.java | 2 +- .../PassiveAggressiveRegressionUDTF.java| 2 +- .../java/hivemall/sketch/bloom/BloomAndUDF.java | 62 ++ .../hivemall/sketch/bloom/BloomContainsUDF.java | 71 ++ .../hivemall/sketch/bloom/BloomFilterUtils.java | 147 .../java/hivemall/sketch/bloom/BloomNotUDF.java | 59 ++ .../java/hivemall/sketch/bloom/BloomOrUDF.java | 62 ++ .../java/hivemall/sketch/bloom/BloomUDAF.java | 101 +++ .../hivemall/statistics/MovingAverageUDTF.java | 84 +++ .../main/java/hivemall/tools/TryCastUDF.java| 82 +++ .../hivemall/tools/array/ArrayAppendUDF.java| 103 +++ .../hivemall/tools/array/ArrayElementAtUDF.java | 80 +++ .../hivemall/tools/array/ArrayFlattenUDF.java | 111 +++ .../hivemall/tools/array/ArraySliceUDF.java | 141 .../hivemall/tools/array/ArrayUnionUDF.java | 112 +++ .../tools/array/ConditionalEmitUDTF.java| 128 .../hivemall/tools/array/FirstElementUDF.java | 68 ++ .../hivemall/tools/array/LastElementUDF.java| 70 ++ .../java/hivemall/tools/array/SubarrayUDF.java | 48 -- .../java/hivemall/tools/json/FromJsonUDF.java | 148 .../java/hivemall/tools/json/ToJsonUDF.java | 94 +++ .../java/hivemall/tools/sanity/AssertUDF.java | 46 ++ .../hivemall/tools/sanity/RaiseErrorUDF.java| 38 + .../hivemall/tools/vector/VectorAddUDF.java | 139 .../hivemall/tools/vector/VectorDotUDF.java | 178 + .../utils/collections/DoubleRingBuffer.java | 4 + .../java/hivemall/utils/hadoop/HiveUtils.java | 80 ++- .../hivemall/utils/hadoop/JsonSerdeUtils.java | 715 +++ .../hivemall/utils/hadoop/WritableUtils.java| 27 + .../utils/hashing/HashFunctionFactory.java | 1 - .../java/hivemall/utils/lang/StringUtils.java | 16 +- .../hivemall/utils/stats/MovingAverage.java | 74 ++ .../hivemall/utils/stats/OnlineVariance.java| 77 ++ .../hivemall/common/OnlineVarianceTest.java | 89 --- .../hivemall/sketch/bloom/BloomAndUDFTest.java | 89 +++ .../sketch/bloom/BloomContainsUDFTest.java | 71 ++ .../sketch/bloom/BloomFilterUtilsTest.java | 78 ++ .../hivemall/sketch/bloom/BloomNotUDFTest.java | 67 ++ .../hivemall/sketch/bloom/BloomOrUDFTest.java | 89 +++ .../statistics/MovingAverageUDTFTest.java | 68 ++ .../java/hivemall/tools/TryCastUDFTest.java | 59 ++ .../tools/array/ArrayAppendUDFTest.java | 106 +++ .../tools/array/ArrayElementAtUDFTest.java | 86 +++ .../tools/array/ArrayFlattenUDFTest.java| 56 ++ .../hivemall/tools/array/ArraySliceUDFTest.java | 119 +++ .../hivemall/tools/array/ArrayUnionUDFTest.java | 65 ++ .../tools/array/ConditionalEmitUDTFTest.java| 70 ++ .../tools/array/FirstElementUDFTest.java| 66 ++ .../tools/array/LastElementUDFTest.java | 66 ++ .../hivemall/tools/json/FromJsonUDFTest.java| 82 +++ .../java/hivemall/tools/json/ToJsonUDFTest.java | 52 ++ .../hivemall/tools/sanity/AssertUDFTest.java| 39 + .../tools/sanity/RaiseErrorUDFTest.java | 32 + .../hivemall/tools/vector/VectorAddUDFTest.java | 85 +++ .../hivemall/tools/vector/VectorDotUDFTest.java | 83 +++ .../utils/collections/DoubleRingBufferTest.java | 24 + .../utils/hadoop/JsonSerdeUtilsTest.java| 365 ++ .../hivemall/utils/stats/MovingAverageTest.java | 53 ++ .../utils/stats/OnlineVarianceTest.java | 91 +++ pom.xml | 6 + resources/ddl/define-all-as-permanent.hive | 6 +- resources/ddl/define-all.hive | 6 +- resources/ddl/define-all.spark | 5 +- resources/ddl/define-udfs.td.hql| 3 +- .../org/apache/spark/sql/hive/HivemallOps.scala | 20 +- 68 files changed, 5201 insertions(+), 251 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/NOTICE -- diff --git a/NOTICE b/NOTICE index 34b5f5d..385a198 100644
[2/3] incubator-hivemall git commit: Merged brickhouse functions #135
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/main/java/hivemall/tools/sanity/RaiseErrorUDF.java -- diff --git a/core/src/main/java/hivemall/tools/sanity/RaiseErrorUDF.java b/core/src/main/java/hivemall/tools/sanity/RaiseErrorUDF.java new file mode 100644 index 000..194085c --- /dev/null +++ b/core/src/main/java/hivemall/tools/sanity/RaiseErrorUDF.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.tools.sanity; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.UDFType; + +@Description(name = "raise_error", value = "_FUNC_() or _FUNC_(string msg) - Throws an error") +@UDFType(deterministic = true, stateful = false) +public final class RaiseErrorUDF extends UDF { + +public boolean evaluate() throws HiveException { +throw new HiveException(); +} + +public boolean evaluate(String errorMessage) throws HiveException { +throw new HiveException(errorMessage); +} + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/main/java/hivemall/tools/vector/VectorAddUDF.java -- diff --git a/core/src/main/java/hivemall/tools/vector/VectorAddUDF.java b/core/src/main/java/hivemall/tools/vector/VectorAddUDF.java new file mode 100644 index 000..8442ae3 --- /dev/null +++ b/core/src/main/java/hivemall/tools/vector/VectorAddUDF.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.tools.vector; + +import hivemall.utils.hadoop.HiveUtils; +import hivemall.utils.lang.StringUtils; + +import java.util.Arrays; +import java.util.List; + +import javax.annotation.Nonnegative; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.UDFType; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; + +@Description(name = "vector_add", +value = "_FUNC_(array x, array y) - Perform vector ADD operation.") +@UDFType(deterministic = true, stateful = false) +public final class VectorAddUDF extends GenericUDF { + +private ListObjectInspector xOI, yOI; +private PrimitiveObjectInspector xElemOI, yElemOI; +private boolean floatingPoints; + +@Override +public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { +if (argOIs.length != 2) { +throw new UDFArgumentLengthException("Expected 2 arguments, but got " +