Close #131: [v0.5.0-rc3] Merge v0.5.0 branch
Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/3a718713 Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/3a718713 Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/3a718713 Branch: refs/heads/master Commit: 3a718713afb612848312afae74301ec2cbf1d8a2 Parents: 448847f Author: Makoto Yui <[email protected]> Authored: Tue Feb 20 16:17:51 2018 +0900 Committer: Makoto Yui <[email protected]> Committed: Tue Feb 20 16:17:51 2018 +0900 ---------------------------------------------------------------------- .gitignore | 1 + .rat-excludes | 6 +- LICENSE | 120 +++++++ NOTICE | 82 +---- VERSION | 2 +- bin/build.sh | 1 - bin/build_xgboost.sh | 87 ----- bin/maven_central_release.sh | 33 -- core/pom.xml | 112 +------ .../main/java/hivemall/HivemallConstants.java | 2 +- .../hivemall/ftvec/ranking/BprSamplingUDTF.java | 38 ++- .../smile/classification/DecisionTree.java | 24 +- .../smile/regression/RegressionTree.java | 20 +- .../hivemall/smile/tools/TreePredictUDF.java | 63 +++- .../main/java/hivemall/utils/codec/Base91.java | 20 +- .../hivemall/utils/lang/ExceptionUtils.java | 118 +++++++ .../main/java/hivemall/utils/math/FastMath.java | 67 ++-- .../main/resources/META-INF/LICENSE-jafama.txt | 202 +++++++++++ .../main/resources/META-INF/LICENSE-smile.txt | 203 +++++++++++ dist/pom.xml | 163 +++++++++ docs/gitbook/binaryclass/news20_rf.md | 5 +- docs/gitbook/binaryclass/titanic_rf.md | 10 +- docs/gitbook/multiclass/iris_randomforest.md | 8 +- mixserv/pom.xml | 43 +-- nlp/pom.xml | 132 +------- .../hivemall/nlp/tokenizer/KuromojiUDF.java | 52 +-- pom.xml | 318 ++++++++++-------- spark/common/pom.xml | 64 ++++ ...isticRegressionDataGeneratorUDTFWrapper.java | 109 ++++++ .../java/hivemall/ftvec/AddBiasUDFWrapper.java | 83 +++++ .../ftvec/AddFeatureIndexUDFWrapper.java | 85 +++++ .../ftvec/ExtractFeatureUDFWrapper.java | 73 ++++ .../hivemall/ftvec/ExtractWeightUDFWrapper.java | 73 ++++ .../hivemall/ftvec/SortByFeatureUDFWrapper.java | 92 +++++ .../scaling/L2NormalizationUDFWrapper.java | 95 ++++++ .../hivemall/knn/lsh/MinHashesUDFWrapper.java | 93 ++++++ .../hivemall/tools/mapred/RowIdUDFWrapper.java | 72 ++++ .../main/scala/hivemall/HivemallException.scala | 25 ++ .../spark/ml/feature/HivemallLabeledPoint.scala | 82 +++++ spark/pom.xml | 295 ++++++++++++++++ spark/scalastyle-config.xml | 333 +++++++++++++++++++ spark/spark-2.0/pom.xml | 147 +------- .../spark/streaming/HivemallStreamingOps.scala | 47 +++ .../apache/spark/sql/hive/HiveUdfSuite.scala | 2 +- .../spark/sql/hive/HivemallOpsSuite.scala | 2 +- spark/spark-2.1/pom.xml | 145 +------- .../spark/streaming/HivemallStreamingOps.scala | 47 +++ .../apache/spark/sql/hive/HiveUdfSuite.scala | 2 +- .../spark/sql/hive/HivemallOpsSuite.scala | 2 +- spark/spark-2.2/pom.xml | 167 ++-------- .../spark/sql/hive/HivemallGroupedDataset.scala | 2 +- .../spark/streaming/HivemallStreamingOps.scala | 47 +++ .../apache/spark/sql/hive/HiveUdfSuite.scala | 2 +- .../spark/sql/hive/HivemallOpsSuite.scala | 2 +- spark/spark-common/pom.xml | 146 -------- spark/spark-common/scalastyle-config.xml | 333 ------------------- ...isticRegressionDataGeneratorUDTFWrapper.java | 109 ------ .../java/hivemall/ftvec/AddBiasUDFWrapper.java | 83 ----- .../ftvec/AddFeatureIndexUDFWrapper.java | 85 ----- .../ftvec/ExtractFeatureUDFWrapper.java | 73 ---- .../hivemall/ftvec/ExtractWeightUDFWrapper.java | 73 ---- .../hivemall/ftvec/SortByFeatureUDFWrapper.java | 92 ----- .../scaling/L2NormalizationUDFWrapper.java | 95 ------ .../hivemall/knn/lsh/MinHashesUDFWrapper.java | 93 ------ .../hivemall/tools/mapred/RowIdUDFWrapper.java | 72 ---- .../main/scala/hivemall/HivemallException.scala | 25 -- .../spark/ml/feature/HivemallLabeledPoint.scala | 82 ----- .../spark/streaming/HivemallStreamingOps.scala | 47 --- src/site/resources/LICENSE-font_awesome.txt | 86 +++++ xgboost/lib/xgboost4j-0.60-0.10.jar | Bin 1424975 -> 0 bytes xgboost/pom.xml | 143 +------- .../java/hivemall/xgboost/XGBoostUtils.java | 4 +- .../tools/XGBoostMulticlassPredictUDTF.java | 15 +- 73 files changed, 3023 insertions(+), 2753 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/.gitignore ---------------------------------------------------------------------- diff --git a/.gitignore b/.gitignore index 84b63c8..3ba5593 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,4 @@ release.properties \#*# pom.xml.next pom.xml.tag +.cache-main http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/.rat-excludes ---------------------------------------------------------------------- diff --git a/.rat-excludes b/.rat-excludes index fb6ce1c..fcb4b31 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -26,4 +26,8 @@ resources/eclipse-style.xml **/*.spark **/*.hql docs/gitbook/_book/** -docs/gitbook/node_modules/** \ No newline at end of file +docs/gitbook/node_modules/** +**/release.properties +**/derby.log +**/LICENSE-*.txt +**/Base91.java http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/LICENSE ---------------------------------------------------------------------- diff --git a/LICENSE b/LICENSE index f433b1a..26b11dd 100644 --- a/LICENSE +++ b/LICENSE @@ -175,3 +175,123 @@ of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +APACHE HIVEMALL SUBCOMPONENTS: + +The Apache Hivemall project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + +--------------------------------------------------------------------------- +The Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +--------------------------------------------------------------------------- + +This product bundles a modified version of 'Smile' which is licensed +under the Apache License Version 2.0, specifically for Random Forest module. +For details, see https://github.com/haifengl/smile/ + + You can find a copy of the License at + + core/src/main/resources/META-INF/LICENSE-smile.txt + + which is placed under META-INF/ in a jar. + +This product bundles a modified version of `Jafama` which is licensed +under the Apache License Version 2.0, specifically for FastMath.java. +For details, see https://github.com/jeffhain/jafama/ + + You can find a copy of the License at + + core/src/main/resources/META-INF/LICENSE-jafama.txt + + which is placed under META-INF/ in a jar. + +--------------------------------------------------------------------------- + The 3-Clause BSD License (https://opensource.org/licenses/BSD-3-Clause) +--------------------------------------------------------------------------- + +This product bundles a modified version of Jochaim Henke's `Base91 +Encoder/Decoder` which is licensed under the BSD 3-Clause License, +specifically for Base91.java. +For details, see https://github.com/bwaldvogel/base91 + + Copyright (c) 2000-2006 Joachim Henke + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + - Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + - Neither the name of Joachim Henke nor the names of his contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +--------------------------------------------------------------------------- + Public Domain License +--------------------------------------------------------------------------- + +This product bundles public domain software derived from `fdlibm`, +specifically for FastMath.java. +For details, see http://www.netlib.org/fdlibm/fdlibm.h + + Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + + Developed at SunSoft, a Sun Microsystems, Inc. business. + Permission to use, copy, modify, and distribute this + software is freely granted, provided that this notice + is preserved. + +--------------------------------------------------------------------------- + The SIL Open Font License (https://opensource.org/licenses/OFL-1.1) +--------------------------------------------------------------------------- + +This product bundles `Font-awesome` fonts which is licensed under the +SIL Open Font License (OFL) 1.1, specifically for the project site. +For details, see http://fontawesome.io/ + + You can find a copy of the License at + + src/site/resources/LICENSE-font_awesome.txt http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/NOTICE ---------------------------------------------------------------------- diff --git a/NOTICE b/NOTICE index bfc4af8..34b5f5d 100644 --- a/NOTICE +++ b/NOTICE @@ -1,81 +1,13 @@ Apache Hivemall -Copyright 2016 and onwards The Apache Software Foundation +Copyright 2016-2018 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). -Copyright (C) 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST) -Copyright (C) 2015-2016 Makoto Yui -Copyright (C) 2015-2016 Treasure Data, Inc. +This product is based on source code originally developed by AIST and Treasure Data, Inc. +They have been licensed to the Apache Software Foundation under Software Grant Agreements from +the following individuals and organizations: ------------------------------------------------------------------------------------------------------- -Copyright notifications which have been relocated from source files - -o hivemall/core/src/main/java/hivemall/smile/classification/DecisionTree.java - hivemall/core/src/main/java/hivemall/smile/regression/RegressionTree.java - - Copyright (c) 2010 Haifeng Li - - https://github.com/haifengl/smile - Licensed under the Apache License, Version 2.0 - -o hivemall/core/src/main/java/hivemall/utils/codec/Base91.java - - Copyright (c) 2000-2006 Joachim Henke - - https://github.com/bwaldvogel/base91 - Licensed under the BSD 3-Clause License - -o hivemall/core/src/main/java/hivemall/utils/collections/OpenHashMap.java - - Copyright (C) 2010 catchpole.net - - https://github.com/slipperyseal/atomicobjects/ - Licensed under the Apache License, Version 2.0 - -o hivemall/core/src/main/java/hivemall/utils/math/FastMath.java - - Copyright 2012-2015 Jeff Hain - - https://github.com/jeffhain/jafama/ - Licensed under the Apache License, Version 2.0 - - Copyright (C) 1993 by Sun Microsystems, Inc. - - Permission to use, copy, modify, and distribute this software is freely granted, provided that this notice is preserved. - ------------------------------------------------------------------------------------------------------- -Copyright notifications which have been relocated from ASF projects - -o hivemall/core/src/main/java/hivemall/utils/math/MathUtils.java#erfInv() - - Copyright (C) 2003-2016 The Apache Software Foundation. - - http://commons.apache.org/proper/commons-math/ - Licensed under the Apache License, Version 2.0 - -o hivemall/core/src/main/java/hivemall/utils/buffer/DynamicByteArray.java - - Copyright 2013-2015 The Apache Software Foundation - - https://orc.apache.org/ - Licensed under the Apache License, Version 2.0 - -o hivemall/spark/spark-2.0/extra-src/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala - hivemall/spark/spark-2.0/src/test/scala/org/apache/spark/sql/QueryTest.scala - hivemall/spark/spark-2.0/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala - hivemall/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/test/TestHiveSingleton.scala - hivemall/spark/spark-2.1/extra-src/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala - hivemall/spark/spark-2.1/src/test/scala/org/apache/spark/sql/QueryTest.scala - hivemall/spark/spark-2.1/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala - hivemall/spark/spark-2.1/src/test/scala/org/apache/spark/sql/hive/test/TestHiveSingleton.scala - hivemall/spark/spark-2.1/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala - hivemall/spark/spark-2.1/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala - hivemall/spark/spark-2.1/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkBase.scala - - Copyright (C) 2014-2017 The Apache Software Foundation. - - http://spark.apache.org/ - Licensed under the Apache License, Version 2.0 - - \ No newline at end of file + - Copyright 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST) + - Copyright 2015-2016 Makoto Yui + - Copyright 2015-2016 Treasure Data, Inc. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/VERSION ---------------------------------------------------------------------- diff --git a/VERSION b/VERSION index 17de5ad..89b6d66 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.5.0-incubating-SNAPSHOT +0.5.1-incubating-SNAPSHOT http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/bin/build.sh ---------------------------------------------------------------------- diff --git a/bin/build.sh b/bin/build.sh index 8487d70..05d1f8f 100755 --- a/bin/build.sh +++ b/bin/build.sh @@ -30,5 +30,4 @@ if [ "$HIVEMALL_HOME" = "" ]; then fi cd $HIVEMALL_HOME -mvn validate -Pxgboost mvn clean package -Dskiptests=true -Dmaven.test.skip=true http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/bin/build_xgboost.sh ---------------------------------------------------------------------- diff --git a/bin/build_xgboost.sh b/bin/build_xgboost.sh deleted file mode 100755 index 0bebcf8..0000000 --- a/bin/build_xgboost.sh +++ /dev/null @@ -1,87 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -# xgboost requires g++-4.6 or higher (https://github.com/dmlc/xgboost/blob/master/doc/build.md), -# so we need to first check if the requirement is satisfied. -COMPILER_REQUIRED_VERSION="4.6" -COMPILER_VERSION=`g++ --version 2> /dev/null` - -# Check if GNU g++ installed -if [ $? = 127 ]; then - echo "First, you need to install g++" - exit 1 -elif [[ "$COMPILER_VERSION" = *LLVM* ]]; then - echo "You must use GNU g++, but the detected compiler was clang++" - exit 1 -fi - -COMPILER_VERSION_NUMBER=`echo $COMPILER_VERSION | grep ^g++ | \ - awk 'match($0, /[0-9]+\.[0-9]+\.[0-9]+/) {print substr($0, RSTART, RLENGTH)}'` - -# See simple version normalization: http://stackoverflow.com/questions/16989598/bash-comparing-version-numbers -function version { echo "$@" | awk -F. '{ printf("%03d%03d%03d\n", $1,$2,$3); }'; } -if [ $(version $COMPILER_VERSION_NUMBER) -lt $(version $COMPILER_REQUIRED_VERSION) ]; then - echo "You must compile xgboost with GNU g++-$COMPILER_REQUIRED_VERSION or higher," \ - "but the detected compiler was g++-$COMPILER_VERSION_NUMBER" - exit 1 -fi - -# Target commit hash value -XGBOOST_HASHVAL='7ab15a0b31c870c7779691639f521df3ccd4a56e' - -# Move to a top directory -if [ "$HIVEMALL_HOME" = "" ]; then - if [ -e ../bin/${0##*/} ]; then - HIVEMALL_HOME=`pwd`/.. - elif [ -e ./bin/${0##*/} ]; then - HIVEMALL_HOME=`pwd` - else - echo "env HIVEMALL_HOME not defined" - exit 1 - fi -fi - -cd $HIVEMALL_HOME - -# Final output dir for a custom-compiled xgboost binary -HIVEMALL_LIB_DIR="$HIVEMALL_HOME/xgboost/src/main/resources/lib/" -rm -rf $HIVEMALL_LIB_DIR >> /dev/null -mkdir -p $HIVEMALL_LIB_DIR - -# Move to an output directory -XGBOOST_OUT="$HIVEMALL_HOME/target/xgboost-$XGBOOST_HASHVAL" -rm -rf $XGBOOST_OUT >> /dev/null -mkdir -p $XGBOOST_OUT -cd $XGBOOST_OUT - -# Fetch xgboost sources -git clone --progress https://github.com/maropu/xgboost.git -cd xgboost -git checkout $XGBOOST_HASHVAL - -# Resolve dependent sources -git submodule init -git submodule update - -# Copy a built binary to the output -cd jvm-packages -ENABLE_STATIC_LINKS=1 ./create_jni.sh -cp ./lib/libxgboost4j.* "$HIVEMALL_LIB_DIR" - http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/bin/maven_central_release.sh ---------------------------------------------------------------------- diff --git a/bin/maven_central_release.sh b/bin/maven_central_release.sh deleted file mode 100755 index 8a7918f..0000000 --- a/bin/maven_central_release.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/sh -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -if [ "$HIVEMALL_HOME" = "" ]; then - if [ -e ../bin/${0##*/} ]; then - HIVEMALL_HOME=".." - elif [ -e ./bin/${0##*/} ]; then - HIVEMALL_HOME="." - else - echo "env HIVEMALL_HOME not defined" - exit 1 - fi -fi - -cd $HIVEMALL_HOME -mvn clean deploy -DperformRelease=true -Dskiptests=true -Dmaven.test.skip=true http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/pom.xml ---------------------------------------------------------------------- diff --git a/core/pom.xml b/core/pom.xml index c79124a..82cb369 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -16,14 +16,13 @@ specific language governing permissions and limitations under the License. --> -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> <groupId>org.apache.hivemall</groupId> <artifactId>hivemall</artifactId> - <version>0.5.0-incubating-SNAPSHOT</version> + <version>0.5.1-incubating-SNAPSHOT</version> <relativePath>../pom.xml</relativePath> </parent> @@ -40,67 +39,41 @@ <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> - <version>${hadoop.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-core</artifactId> - <version>${hadoop.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-exec</artifactId> - <version>${hive.version}</version> <scope>provided</scope> - <exclusions> - <exclusion> - <artifactId>jetty</artifactId> - <groupId>org.mortbay.jetty</groupId> - </exclusion> - <exclusion> - <groupId>javax.jdo</groupId> - <artifactId>jdo2-api</artifactId> - </exclusion> - <exclusion> - <groupId>asm-parent</groupId> - <artifactId>asm-parent</artifactId> - </exclusion> - <exclusion> - <groupId>asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - </exclusions> </dependency> <dependency> <groupId>commons-cli</groupId> <artifactId>commons-cli</artifactId> - <version>1.2</version> <scope>provided</scope> </dependency> <dependency> <groupId>commons-logging</groupId> <artifactId>commons-logging</artifactId> - <version>1.0.4</version> <scope>provided</scope> </dependency> <dependency> <groupId>log4j</groupId> <artifactId>log4j</artifactId> - <version>1.2.17</version> <scope>provided</scope> </dependency> <dependency> <groupId>javax.jdo</groupId> <artifactId>jdo2-api</artifactId> - <version>2.3-eb</version> <scope>provided</scope> </dependency> <dependency> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> - <version>${guava.version}</version> <scope>provided</scope> </dependency> @@ -160,104 +133,23 @@ <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> - <version>${junit.version}</version> <scope>test</scope> </dependency> <dependency> <groupId>org.mockito</groupId> <artifactId>mockito-core</artifactId> - <version>1.10.19</version> <scope>test</scope> </dependency> <dependency> <groupId>org.powermock</groupId> <artifactId>powermock-module-junit4</artifactId> - <version>1.6.3</version> <scope>test</scope> </dependency> <dependency> <groupId>org.powermock</groupId> <artifactId>powermock-api-mockito</artifactId> - <version>1.6.3</version> <scope>test</scope> </dependency> </dependencies> - <build> - <directory>target</directory> - <outputDirectory>target/classes</outputDirectory> - <finalName>${project.artifactId}-${project.version}</finalName> - <testOutputDirectory>target/test-classes</testOutputDirectory> - <plugins> - <!-- hivemall-core-xx.jar --> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <version>2.5</version> - <configuration> - <finalName>${project.artifactId}-${project.version}</finalName> - <outputDirectory>${project.parent.build.directory}</outputDirectory> - </configuration> - </plugin> - <!-- hivemall-core-xx-with-dependencies.jar including minimum dependencies --> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-shade-plugin</artifactId> - <version>3.1.0</version> - <executions> - <execution> - <id>jar-with-dependencies</id> - <phase>package</phase> - <goals> - <goal>shade</goal> - </goals> - <configuration> - <finalName>${project.artifactId}-${project.version}-with-dependencies</finalName> - <outputDirectory>${project.parent.build.directory}</outputDirectory> - <minimizeJar>true</minimizeJar> - <createDependencyReducedPom>false</createDependencyReducedPom> - <artifactSet> - <includes> - <include>io.netty:netty-all</include> - <include>com.github.haifengl:smile-core</include> - <include>com.github.haifengl:smile-math</include> - <include>com.github.haifengl:smile-data</include> - <include>org.tukaani:xz</include> - <include>org.apache.commons:commons-math3</include> - <include>org.roaringbitmap:RoaringBitmap</include> - <include>it.unimi.dsi:fastutil</include> - <include>com.clearspring.analytics:stream</include> - </includes> - </artifactSet> - <transformers> - <transformer - implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> - <manifestEntries> - <Implementation-Title>${project.name}</Implementation-Title> - <Implementation-Version>${project.version}</Implementation-Version> - <Implementation-Vendor>${project.organization.name}</Implementation-Vendor> - </manifestEntries> - </transformer> - <!-- - <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer"> - <addHeader>false</addHeader> - </transformer> - --> - </transformers> - <filters> - <filter> - <artifact>*:*</artifact> - <excludes> - <exclude>META-INF/LICENSE.txt</exclude> - <exclude>META-INF/NOTICE.txt</exclude> - </excludes> - </filter> - </filters> - </configuration> - </execution> - </executions> - </plugin> - </plugins> - </build> - </project> http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/HivemallConstants.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/HivemallConstants.java b/core/src/main/java/hivemall/HivemallConstants.java index 5e6e407..955aeb1 100644 --- a/core/src/main/java/hivemall/HivemallConstants.java +++ b/core/src/main/java/hivemall/HivemallConstants.java @@ -20,7 +20,7 @@ package hivemall; public final class HivemallConstants { - public static final String VERSION = "0.5.0-incubating-SNAPSHOT"; + public static final String VERSION = "0.5.1-incubating-SNAPSHOT"; public static final String BIAS_CLAUSE = "0"; public static final int BIAS_CLAUSE_HASHVAL = 0; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/ftvec/ranking/BprSamplingUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/ftvec/ranking/BprSamplingUDTF.java b/core/src/main/java/hivemall/ftvec/ranking/BprSamplingUDTF.java index ab418ed..821c734 100644 --- a/core/src/main/java/hivemall/ftvec/ranking/BprSamplingUDTF.java +++ b/core/src/main/java/hivemall/ftvec/ranking/BprSamplingUDTF.java @@ -18,12 +18,6 @@ */ package hivemall.ftvec.ranking; -import hivemall.UDTFWithOptions; -import hivemall.utils.collections.lists.IntArrayList; -import hivemall.utils.hadoop.HiveUtils; -import hivemall.utils.lang.BitUtils; -import hivemall.utils.lang.Primitives; - import java.util.ArrayList; import java.util.BitSet; import java.util.Random; @@ -45,6 +39,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.io.IntWritable; +import hivemall.UDTFWithOptions; +import hivemall.utils.collections.lists.IntArrayList; +import hivemall.utils.hadoop.HiveUtils; +import hivemall.utils.lang.BitUtils; +import hivemall.utils.lang.Primitives; + @Description(name = "bpr_sampling", value = "_FUNC_(int userId, List<int> posItems [, const string options])" + "- Returns a relation consists of <int userId, int itemId>") @@ -54,9 +54,13 @@ public final class BprSamplingUDTF extends UDTFWithOptions { private ListObjectInspector itemListOI; private PrimitiveObjectInspector itemElemOI; - private PositiveOnlyFeedback feedback; + // Need to avoid + // org.apache.hive.com.esotericsoftware.kryo.KryoException: java.lang.ArrayIndexOutOfBoundsException: 1 + @Nullable + private transient PositiveOnlyFeedback feedback; // sampling options + private int maxItemId; private float samplingRate; private boolean withoutReplacement; private boolean pairSampling; @@ -106,8 +110,7 @@ public final class BprSamplingUDTF extends UDTFWithOptions { } } - this.feedback = pairSampling ? new PerEventPositiveOnlyFeedback(maxItemId) - : new PositiveOnlyFeedback(maxItemId); + this.maxItemId = maxItemId; this.samplingRate = samplingRate; this.withoutReplacement = withoutReplacement; this.pairSampling = pairSampling; @@ -147,6 +150,11 @@ public final class BprSamplingUDTF extends UDTFWithOptions { @Override public void process(@Nonnull Object[] args) throws HiveException { + if (feedback == null) { + this.feedback = pairSampling ? new PerEventPositiveOnlyFeedback(maxItemId) + : new PositiveOnlyFeedback(maxItemId); + } + int userId = PrimitiveObjectInspectorUtils.getInt(args[0], userOI); validateIndex(userId); @@ -202,7 +210,8 @@ public final class BprSamplingUDTF extends UDTFWithOptions { } } - private void forward(final int user, final int posItem, final int negItem) throws HiveException { + private void forward(final int user, final int posItem, final int negItem) + throws HiveException { assert (user >= 0) : user; assert (posItem >= 0) : posItem; assert (negItem >= 0) : negItem; @@ -260,9 +269,8 @@ public final class BprSamplingUDTF extends UDTFWithOptions { * Caution: This is not a perfect 'without sampling' but it does 'without sampling' for positive * feedbacks. */ - private void uniformUserSamplingWithoutReplacement( - @Nonnull final PositiveOnlyFeedback feedback, final int numSamples) - throws HiveException { + private void uniformUserSamplingWithoutReplacement(@Nonnull final PositiveOnlyFeedback feedback, + final int numSamples) throws HiveException { int numUsers = feedback.getNumUsers(); if (numUsers == 0) { return; @@ -280,8 +288,8 @@ public final class BprSamplingUDTF extends UDTFWithOptions { int nthUser = rand.nextInt(numUsers); int user = BitUtils.indexOfSetBit(userBits, nthUser); if (user == -1) { - throw new HiveException("Cannot find " + nthUser + "-th user among " + numUsers - + " users"); + throw new HiveException( + "Cannot find " + nthUser + "-th user among " + numUsers + " users"); } IntArrayList posItems = feedback.getItems(user, true); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/smile/classification/DecisionTree.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/smile/classification/DecisionTree.java b/core/src/main/java/hivemall/smile/classification/DecisionTree.java index f2ff560..e6160d2 100644 --- a/core/src/main/java/hivemall/smile/classification/DecisionTree.java +++ b/core/src/main/java/hivemall/smile/classification/DecisionTree.java @@ -1,22 +1,4 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* * Copyright (c) 2010 Haifeng Li * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -31,6 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +// This file includes a modified version of Smile: +// https://github.com/haifengl/smile/blob/master/core/src/main/java/smile/classification/DecisionTree.java package hivemall.smile.classification; import static hivemall.smile.utils.SmileExtUtils.resolveFeatureName; @@ -369,8 +353,8 @@ public final class DecisionTree implements Classifier<Vector> { public void exportGraphviz(@Nonnull final StringBuilder builder, @Nullable final String[] featureNames, @Nullable final String[] classNames, - @Nonnull final String outputName, @Nullable double[] colorBrew, - final @Nonnull MutableInt nodeIdGenerator, final int parentNodeId) { + @Nonnull final String outputName, @Nullable final double[] colorBrew, + @Nonnull final MutableInt nodeIdGenerator, final int parentNodeId) { final int myNodeId = nodeIdGenerator.getValue(); if (trueChild == null && falseChild == null) { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/smile/regression/RegressionTree.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/smile/regression/RegressionTree.java b/core/src/main/java/hivemall/smile/regression/RegressionTree.java index 0670876..b085734 100755 --- a/core/src/main/java/hivemall/smile/regression/RegressionTree.java +++ b/core/src/main/java/hivemall/smile/regression/RegressionTree.java @@ -1,22 +1,4 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* * Copyright (c) 2010 Haifeng Li * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -31,6 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +// This file includes a modified version of Smile: +// https://github.com/haifengl/smile/blob/master/core/src/main/java/smile/regression/RegressionTree.java package hivemall.smile.regression; import static hivemall.smile.utils.SmileExtUtils.resolveFeatureName; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java b/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java index 46b8758..ea3bc29 100644 --- a/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java +++ b/core/src/main/java/hivemall/smile/tools/TreePredictUDF.java @@ -18,6 +18,7 @@ */ package hivemall.smile.tools; +import hivemall.UDFWithOptions; import hivemall.math.vector.DenseVector; import hivemall.math.vector.SparseVector; import hivemall.math.vector.Vector; @@ -37,11 +38,12 @@ import java.util.List; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Options; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.UDFType; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -53,12 +55,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspe import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -@Description( - name = "tree_predict", - value = "_FUNC_(string modelId, string model, array<double|string> features [, const boolean classification])" - + " - Returns a prediction result of a random forest") +@Description(name = "tree_predict", + value = "_FUNC_(string modelId, string model, array<double|string> features [, const string options | const boolean classification=false])" + + " - Returns a prediction result of a random forest" + + " in <int value, array<double> posteriori> for classification and <double> for regression") @UDFType(deterministic = true, stateful = false) -public final class TreePredictUDF extends GenericUDF { +public final class TreePredictUDF extends UDFWithOptions { private boolean classification; private StringObjectInspector modelOI; @@ -72,9 +74,25 @@ public final class TreePredictUDF extends GenericUDF { private transient Evaluator evaluator; @Override + protected Options getOptions() { + Options opts = new Options(); + opts.addOption("c", "classification", false, + "Predict as classification [default: not enabled]"); + return opts; + } + + @Override + protected CommandLine processOptions(@Nonnull String optionValue) throws UDFArgumentException { + CommandLine cl = parseOptions(optionValue); + + this.classification = cl.hasOption("classification"); + return cl; + } + + @Override public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { if (argOIs.length != 3 && argOIs.length != 4) { - throw new UDFArgumentException("_FUNC_ takes 3 or 4 arguments"); + throw new UDFArgumentException("tree_predict takes 3 or 4 arguments"); } this.modelOI = HiveUtils.asStringOI(argOIs[1]); @@ -89,15 +107,25 @@ public final class TreePredictUDF extends GenericUDF { this.denseInput = false; } else { throw new UDFArgumentException( - "_FUNC_ takes array<double> or array<string> for the second argument: " + "tree_predict takes array<double> or array<string> for the second argument: " + listOI.getTypeName()); } - boolean classification = false; if (argOIs.length == 4) { - classification = HiveUtils.getConstBoolean(argOIs[3]); + ObjectInspector argOI3 = argOIs[3]; + if (HiveUtils.isConstBoolean(argOI3)) { + this.classification = HiveUtils.getConstBoolean(argOI3); + } else if (HiveUtils.isConstString(argOI3)) { + String opts = HiveUtils.getConstString(argOI3); + processOptions(opts); + } else { + throw new UDFArgumentException( + "tree_predict expects <const boolean> or <const string> for the fourth argument: " + + argOI3.getTypeName()); + } + } else { + this.classification = false; } - this.classification = classification; if (classification) { List<String> fieldNames = new ArrayList<String>(2); @@ -105,7 +133,8 @@ public final class TreePredictUDF extends GenericUDF { fieldNames.add("value"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector); fieldNames.add("posteriori"); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); } else { return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; @@ -116,7 +145,7 @@ public final class TreePredictUDF extends GenericUDF { public Object evaluate(@Nonnull DeferredObject[] arguments) throws HiveException { Object arg0 = arguments[0].get(); if (arg0 == null) { - throw new HiveException("ModelId was null"); + throw new HiveException("modelId should not be null"); } // Not using string OI for backward compatibilities String modelId = arg0.toString(); @@ -134,8 +163,8 @@ public final class TreePredictUDF extends GenericUDF { this.featuresProbe = parseFeatures(arg2, featuresProbe); if (evaluator == null) { - this.evaluator = classification ? new ClassificationEvaluator() - : new RegressionEvaluator(); + this.evaluator = + classification ? new ClassificationEvaluator() : new RegressionEvaluator(); } return evaluator.evaluate(modelId, model, featuresProbe); } @@ -192,8 +221,8 @@ public final class TreePredictUDF extends GenericUDF { } if (feature.indexOf(':') != -1) { - throw new UDFArgumentException("Invaliad feature format `<index>:<value>`: " - + col); + throw new UDFArgumentException( + "Invaliad feature format `<index>:<value>`: " + col); } final int colIndex = Integer.parseInt(feature); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/utils/codec/Base91.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/codec/Base91.java b/core/src/main/java/hivemall/utils/codec/Base91.java index 27bdf62..3e996be 100644 --- a/core/src/main/java/hivemall/utils/codec/Base91.java +++ b/core/src/main/java/hivemall/utils/codec/Base91.java @@ -1,22 +1,4 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* * basE91 encoding/decoding routines * * Copyright (c) 2000-2006 Joachim Henke @@ -46,6 +28,8 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ +// This file contains a modified version of Jochaim Henke's Base91: +// https://github.com/bwaldvogel/base91/blob/master/src/main/java/de/bwaldvogel/base91/Base91.java package hivemall.utils.codec; import hivemall.utils.io.FastByteArrayOutputStream; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/utils/lang/ExceptionUtils.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/lang/ExceptionUtils.java b/core/src/main/java/hivemall/utils/lang/ExceptionUtils.java new file mode 100644 index 0000000..b69c5b0 --- /dev/null +++ b/core/src/main/java/hivemall/utils/lang/ExceptionUtils.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.utils.lang; + +import javax.annotation.Nonnull; + +public final class ExceptionUtils { + + public static final int TRACE_CAUSE_DEPTH = 5; + + private ExceptionUtils() {} + + @Nonnull + public static String prettyPrintStackTrace(@Nonnull final Throwable throwable) { + return prettyPrintStackTrace(throwable, TRACE_CAUSE_DEPTH); + } + + @Nonnull + public static String prettyPrintStackTrace(@Nonnull final Throwable throwable, + final int traceDepth) { + final StringBuilder out = new StringBuilder(512); + out.append(getMessage(throwable)); + out.append("\n\n---- Debugging information ----"); + final int tracedepth; + if (throwable instanceof RuntimeException || throwable instanceof Error) { + tracedepth = -1; + } else { + tracedepth = traceDepth; + } + String captured = captureThrownWithStrackTrace(throwable, "trace-exception", tracedepth); + out.append(captured); + final Throwable cause = throwable.getCause(); + if (cause != null) { + final Throwable rootCause = getRootCause(cause); + captured = captureThrownWithStrackTrace(rootCause, "trace-cause", TRACE_CAUSE_DEPTH); + out.append(captured); + } + out.append("\n------------------------------- \n"); + return out.toString(); + } + + @Nonnull + private static String captureThrownWithStrackTrace(@Nonnull final Throwable throwable, + final String label, final int traceDepth) { + assert (traceDepth >= 1 || traceDepth == -1); + final StringBuilder out = new StringBuilder(255); + final String clazz = throwable.getClass().getName(); + out.append(String.format("\n%-20s: %s \n", ("* " + label), clazz)); + final StackTraceElement[] st = throwable.getStackTrace(); + int at; + final int limit = (traceDepth == -1) ? st.length - 1 : traceDepth; + for (at = 0; at < st.length; at++) { + if (at < limit) { + out.append("\tat " + st[at] + '\n'); + } else { + out.append("\t...\n"); + break; + } + } + if (st.length == 0) { + out.append("\t no stack traces..."); + } else if (at != (st.length - 1)) { + out.append("\tat " + st[st.length - 1]); + } + String errmsg = throwable.getMessage(); + if (errmsg != null) { + out.append(String.format("\n%-20s: \n", ("* " + label + "-error-msg"))); + String[] line = errmsg.split("\n"); + final int maxlines = Math.min(line.length, Math.max(1, TRACE_CAUSE_DEPTH - 2)); + for (int i = 0; i < maxlines; i++) { + out.append('\t'); + out.append(line[i]); + if (i != (maxlines - 1)) { + out.append('\n'); + } + } + } + return out.toString(); + } + + @Nonnull + public static String getMessage(@Nonnull final Throwable throwable) { + String errMsg = throwable.getMessage(); + String clazz = throwable.getClass().getName(); + return (errMsg != null) ? clazz + ": " + errMsg : clazz; + } + + @Nonnull + private static Throwable getRootCause(@Nonnull final Throwable throwable) { + Throwable top = throwable; + while (top != null) { + Throwable parent = top.getCause(); + if (parent != null) { + top = parent; + } else { + break; + } + } + return top; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/java/hivemall/utils/math/FastMath.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/math/FastMath.java b/core/src/main/java/hivemall/utils/math/FastMath.java index d27d6f8..09f7a16 100644 --- a/core/src/main/java/hivemall/utils/math/FastMath.java +++ b/core/src/main/java/hivemall/utils/math/FastMath.java @@ -1,21 +1,32 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Copyright 2012-2015 Jeff Hain * - * http://www.apache.org/licenses/LICENSE-2.0 + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ============================================================================= + * Notice of fdlibm package this program is partially derived from: + * + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ============================================================================= */ +// This file contains a modified version of Jafama's FastMath: +// https://github.com/jeffhain/jafama/blob/master/src/main/java/net/jafama/FastMath.java package hivemall.utils.math; import hivemall.annotations.Experimental; @@ -98,34 +109,6 @@ public final class FastMath { return 1 / (1 + exp(-x)); } - /* - * Copyright 2012-2015 Jeff Hain - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - /* - * ============================================================================= - * Notice of fdlibm package this program is partially derived from: - * - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ============================================================================= - */ - /** * Based on Jafama (https://github.com/jeffhain/jafama/) version 2.2. */ http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/resources/META-INF/LICENSE-jafama.txt ---------------------------------------------------------------------- diff --git a/core/src/main/resources/META-INF/LICENSE-jafama.txt b/core/src/main/resources/META-INF/LICENSE-jafama.txt new file mode 100644 index 0000000..151b7ea --- /dev/null +++ b/core/src/main/resources/META-INF/LICENSE-jafama.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/core/src/main/resources/META-INF/LICENSE-smile.txt ---------------------------------------------------------------------- diff --git a/core/src/main/resources/META-INF/LICENSE-smile.txt b/core/src/main/resources/META-INF/LICENSE-smile.txt new file mode 100644 index 0000000..94ad231 --- /dev/null +++ b/core/src/main/resources/META-INF/LICENSE-smile.txt @@ -0,0 +1,203 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/dist/pom.xml ---------------------------------------------------------------------- diff --git a/dist/pom.xml b/dist/pom.xml new file mode 100644 index 0000000..bea6226 --- /dev/null +++ b/dist/pom.xml @@ -0,0 +1,163 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache.hivemall</groupId> + <artifactId>hivemall</artifactId> + <version>0.5.1-incubating-SNAPSHOT</version> + <relativePath>../pom.xml</relativePath> + </parent> + + <artifactId>hivemall-all</artifactId> + <name>Hivemall Distribution</name> + <packaging>jar</packaging> + + <properties> + <main.basedir>${project.parent.basedir}</main.basedir> + </properties> + + <dependencies> + <!-- compile scope --> + <dependency> + <groupId>org.apache.hivemall</groupId> + <artifactId>hivemall-core</artifactId> + <version>${project.version}</version> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>org.apache.hivemall</groupId> + <artifactId>hivemall-nlp</artifactId> + <version>${project.version}</version> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>org.apache.hivemall</groupId> + <artifactId>hivemall-xgboost</artifactId> + <version>${project.version}</version> + <scope>compile</scope> + </dependency> + </dependencies> + + <build> + <plugins> + <plugin> + <artifactId>maven-jar-plugin</artifactId> + <configuration> + <finalName>${project.artifactId}-${project.version}</finalName> + <archive> + <index>true</index> + <compress>true</compress> + <manifest> + <addClasspath>false</addClasspath> + <addDefaultImplementationEntries>true</addDefaultImplementationEntries> + <addDefaultSpecificationEntries>true</addDefaultSpecificationEntries> + </manifest> + </archive> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <executions> + <execution> + <id>jar-with-dependencies</id> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + <configuration> + <finalName>${project.artifactId}-${project.version}</finalName> + <outputDirectory>${project.parent.build.directory}</outputDirectory> + <minimizeJar>false</minimizeJar> + <createDependencyReducedPom>false</createDependencyReducedPom> + <createSourcesJar>true</createSourcesJar> + <promoteTransitiveDependencies>true</promoteTransitiveDependencies> + <artifactSet> + <includes> + <!-- hivemall-core --> + <include>org.apache.hivemall:hivemall-core</include> + <include>io.netty:netty-all</include> + <include>com.github.haifengl:smile-core</include> + <include>com.github.haifengl:smile-math</include> + <include>com.github.haifengl:smile-data</include> + <include>org.tukaani:xz</include> + <include>org.apache.commons:commons-math3</include> + <include>org.roaringbitmap:RoaringBitmap</include> + <include>it.unimi.dsi:fastutil</include> + <include>com.clearspring.analytics:stream</include> + <!-- hivemall-nlp --> + <include>org.apache.hivemall:hivemall-nlp</include> + <include>org.apache.lucene:lucene-analyzers-kuromoji</include> + <include>org.apache.lucene:lucene-analyzers-smartcn</include> + <include>org.apache.lucene:lucene-analyzers-common</include> + <include>org.apache.lucene:lucene-core</include> + <!-- hivemall-xgboost --> + <include>org.apache.hivemall:hivemall-xgboost</include> + <include>io.github.myui:xgboost4j</include> + <include>com.esotericsoftware.kryo:kryo</include> + </includes> + <excludes> + <exclude>org.apache.hivemall:hivemall-all</exclude> + </excludes> + </artifactSet> + <transformers> + <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> + <manifestEntries> + <Implementation-Title>${project.name}</Implementation-Title> + <Implementation-Version>${project.version}</Implementation-Version> + <Implementation-Vendor>${project.organization.name}</Implementation-Vendor> + </manifestEntries> + </transformer> + </transformers> + <filters> + <filter> + <artifact>org.apache.lucene:*</artifact> + <includes> + <include>**</include> + </includes> + </filter> + <filter> + <artifact>com.esotericsoftware.kryo:kryo</artifact> + <includes> + <include>**</include> + </includes> + </filter> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/LICENSE.txt</exclude> + <exclude>META-INF/NOTICE.txt</exclude> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + <exclude>*.jar</exclude> + <exclude>tracker.py</exclude> + </excludes> + </filter> + </filters> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + +</project> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/docs/gitbook/binaryclass/news20_rf.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/binaryclass/news20_rf.md b/docs/gitbook/binaryclass/news20_rf.md index fd0b475..327939b 100644 --- a/docs/gitbook/binaryclass/news20_rf.md +++ b/docs/gitbook/binaryclass/news20_rf.md @@ -47,7 +47,7 @@ from ## Prediction ```sql -SET hivevar:classification=true; +-- SET hivevar:classification=true; drop table rf_predicted; create table rf_predicted @@ -60,7 +60,8 @@ FROM ( SELECT rowid, m.model_weight, - tree_predict(m.model_id, m.model, t.features, ${classification}) as predicted + tree_predict(m.model_id, m.model, t.features, "-classification") as predicted + -- tree_predict(m.model_id, m.model, t.features, ${classification}) as predicted FROM rf_model m LEFT OUTER JOIN -- CROSS JOIN
