2.2 and spark-common as the submodule of hivemall-spark

myui Mon, 22 Jan 2018 03:03:07 -0800

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/spark-2.0/pom.xml
----------------------------------------------------------------------
diff --git a/spark/spark-2.0/pom.xml b/spark/spark-2.0/pom.xml
index fdaccd5..3d82f89 100644
--- a/spark/spark-2.0/pom.xml
+++ b/spark/spark-2.0/pom.xml
@@ -22,31 +22,31 @@
 
        <parent>
                <groupId>org.apache.hivemall</groupId>
-               <artifactId>hivemall</artifactId>
+               <artifactId>hivemall-spark</artifactId>
                <version>0.5.1-incubating-SNAPSHOT</version>
-               <relativePath>../../pom.xml</relativePath>
+               <relativePath>../pom.xml</relativePath>
        </parent>
 
-       <artifactId>hivemall-spark</artifactId>
+       <artifactId>hivemall-spark_2.0</artifactId>
        <name>Hivemall on Spark 2.0</name>
        <packaging>jar</packaging>
 
        <properties>
-               <main.basedir>${project.parent.basedir}</main.basedir>
+               <main.basedir>${project.parent.parent.basedir}</main.basedir>
+               <spark.version>2.0.2</spark.version>
+               <spark.binary.version>2.0</spark.binary.version>
        </properties>
 
        <dependencies>
-               <!-- hivemall dependencies -->
+               <!-- compile scope -->
                <dependency>
                        <groupId>org.apache.hivemall</groupId>
                        <artifactId>hivemall-core</artifactId>
-                       <version>${project.version}</version>
                        <scope>compile</scope>
                </dependency>
                <dependency>
                        <groupId>org.apache.hivemall</groupId>
                        <artifactId>hivemall-xgboost</artifactId>
-                       <version>${project.version}</version>
                        <scope>compile</scope>
                </dependency>
                <dependency>
@@ -55,22 +55,18 @@
                        <version>${project.version}</version>
                        <scope>compile</scope>
                </dependency>
-
-               <!-- third-party dependencies -->
-               <dependency>
-                       <groupId>org.scala-lang</groupId>
-                       <artifactId>scala-library</artifactId>
-                       <version>${scala.version}</version>
-                       <scope>compile</scope>
-               </dependency>
                <dependency>
                        <groupId>org.apache.commons</groupId>
                        <artifactId>commons-compress</artifactId>
-                       <version>1.8</version>
                        <scope>compile</scope>
                </dependency>
 
-               <!-- other provided dependencies -->
+               <!-- provided scope -->
+               <dependency>
+                       <groupId>org.scala-lang</groupId>
+                       <artifactId>scala-library</artifactId>
+                       <scope>provided</scope>
+               </dependency>
                <dependency>
                        <groupId>org.apache.spark</groupId>
                        
<artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -106,81 +102,26 @@
                <dependency>
                        <groupId>org.apache.hivemall</groupId>
                        <artifactId>hivemall-mixserv</artifactId>
-                       <version>${project.version}</version>
                        <scope>test</scope>
                </dependency>
                <dependency>
                        <groupId>org.xerial</groupId>
                        <artifactId>xerial-core</artifactId>
-                       <version>3.2.3</version>
                        <scope>test</scope>
                </dependency>
                <dependency>
                        <groupId>org.scalatest</groupId>
                        
<artifactId>scalatest_${scala.binary.version}</artifactId>
-                       <version>2.2.4</version>
                        <scope>test</scope>
                </dependency>
        </dependencies>
 
        <build>
-               <directory>target</directory>
-               <outputDirectory>target/classes</outputDirectory>
-               
<finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
-               <testOutputDirectory>target/test-classes</testOutputDirectory>
                <plugins>
-                       <!-- For incremental compilation -->
-                       <plugin>
-                               <groupId>net.alchim31.maven</groupId>
-                               <artifactId>scala-maven-plugin</artifactId>
-                               <version>3.2.2</version>
-                               <executions>
-                                       <execution>
-                                               <id>scala-compile-first</id>
-                                               <phase>process-resources</phase>
-                                               <goals>
-                                                       <goal>compile</goal>
-                                               </goals>
-                                       </execution>
-                                       <execution>
-                                               
<id>scala-test-compile-first</id>
-                                               
<phase>process-test-resources</phase>
-                                               <goals>
-                                                       <goal>testCompile</goal>
-                                               </goals>
-                                       </execution>
-                               </executions>
-                               <configuration>
-                                       
<scalaVersion>${scala.version}</scalaVersion>
-                                       
<recompileMode>incremental</recompileMode>
-                                       <useZincServer>true</useZincServer>
-                                       <args>
-                                               <arg>-unchecked</arg>
-                                               <arg>-deprecation</arg>
-                                               <!-- TODO: To enable this 
option, we need to fix many wornings -->
-                                               <!-- <arg>-feature</arg> -->
-                                       </args>
-                                       <jvmArgs>
-                                               <jvmArg>-Xms512m</jvmArg>
-                                               <jvmArg>-Xmx1024m</jvmArg>
-                                       </jvmArgs>
-                               </configuration>
-                       </plugin>
-                       <!-- hivemall-spark_xx-xx.jar -->
-                       <plugin>
-                               <groupId>org.apache.maven.plugins</groupId>
-                               <artifactId>maven-jar-plugin</artifactId>
-                               <version>2.5</version>
-                               <configuration>
-                                       
<finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
-                                       
<outputDirectory>${project.parent.build.directory}</outputDirectory>
-                               </configuration>
-                       </plugin>
                        <!-- hivemall-spark_xx-xx-with-dependencies.jar 
including minimum dependencies -->
                        <plugin>
                                <groupId>org.apache.maven.plugins</groupId>
                                <artifactId>maven-shade-plugin</artifactId>
-                               <version>3.1.0</version>
                                <executions>
                                        <execution>
                                                <id>jar-with-dependencies</id>
@@ -189,8 +130,8 @@
                                                        <goal>shade</goal>
                                                </goals>
                                                <configuration>
-                                                       
<finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}-with-dependencies</finalName>
-                                                       
<outputDirectory>${project.parent.build.directory}</outputDirectory>
+                                                       
<finalName>${project.artifactId}-${project.version}-with-dependencies</finalName>
+                                                       
<outputDirectory>${project.parent.parent.build.directory}</outputDirectory>
                                                        
<minimizeJar>false</minimizeJar>
                                                        
<createDependencyReducedPom>false</createDependencyReducedPom>
                                                        <artifactSet>
@@ -213,7 +154,6 @@
                        <plugin>
                                <groupId>org.apache.maven.plugins</groupId>
                                <artifactId>maven-surefire-plugin</artifactId>
-                               <version>2.7</version>
                                <configuration>
                                        <skipTests>true</skipTests>
                                </configuration>
@@ -234,6 +174,7 @@
                                                
<SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
                                                <SPARK_TESTING>1</SPARK_TESTING>
                                                
<JAVA_HOME>${env.JAVA_HOME}</JAVA_HOME>
+                                               
<PATH>${env.JAVA_HOME}/bin:${env.PATH}</PATH>
                                        </environmentVariables>
                                        <systemProperties>
                                                
<log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/spark-2.0/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
----------------------------------------------------------------------
diff --git 
a/spark/spark-2.0/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
 
b/spark/spark-2.0/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
new file mode 100644
index 0000000..a6bbb4b
--- /dev/null
+++ 
b/spark/spark-2.0/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.spark.streaming
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.ml.feature.HivemallLabeledPoint
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+import org.apache.spark.streaming.dstream.DStream
+
+final class HivemallStreamingOps(ds: DStream[HivemallLabeledPoint]) {
+
+  def predict[U: ClassTag](f: DataFrame => DataFrame)(implicit sqlContext: 
SQLContext)
+      : DStream[Row] = {
+    ds.transform[Row] { rdd: RDD[HivemallLabeledPoint] =>
+      f(sqlContext.createDataFrame(rdd)).rdd
+    }
+  }
+}
+
+object HivemallStreamingOps {
+
+  /**
+   * Implicitly inject the [[HivemallStreamingOps]] into [[DStream]].
+   */
+  implicit def dataFrameToHivemallStreamingOps(ds: 
DStream[HivemallLabeledPoint])
+      : HivemallStreamingOps = {
+    new HivemallStreamingOps(ds)
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/spark-2.1/pom.xml
----------------------------------------------------------------------
diff --git a/spark/spark-2.1/pom.xml b/spark/spark-2.1/pom.xml
index 0f127a7..e1da779 100644
--- a/spark/spark-2.1/pom.xml
+++ b/spark/spark-2.1/pom.xml
@@ -22,17 +22,19 @@
 
        <parent>
                <groupId>org.apache.hivemall</groupId>
-               <artifactId>hivemall</artifactId>
+               <artifactId>hivemall-spark</artifactId>
                <version>0.5.1-incubating-SNAPSHOT</version>
-               <relativePath>../../pom.xml</relativePath>
+               <relativePath>../pom.xml</relativePath>
        </parent>
 
-       <artifactId>hivemall-spark</artifactId>
+       <artifactId>hivemall-spark_2.1</artifactId>
        <name>Hivemall on Spark 2.1</name>
        <packaging>jar</packaging>
 
        <properties>
-               <main.basedir>${project.parent.basedir}</main.basedir>
+               <main.basedir>${project.parent.parent.basedir}</main.basedir>
+               <spark.version>2.1.1</spark.version>
+               <spark.binary.version>2.1</spark.binary.version>
        </properties>
 
        <dependencies>
@@ -40,13 +42,11 @@
                <dependency>
                        <groupId>org.apache.hivemall</groupId>
                        <artifactId>hivemall-core</artifactId>
-                       <version>${project.version}</version>
                        <scope>compile</scope>
                </dependency>
                <dependency>
                        <groupId>org.apache.hivemall</groupId>
                        <artifactId>hivemall-xgboost</artifactId>
-                       <version>${project.version}</version>
                        <scope>compile</scope>
                </dependency>
                <dependency>
@@ -55,22 +55,18 @@
                        <version>${project.version}</version>
                        <scope>compile</scope>
                </dependency>
-
-               <!-- third-party dependencies -->
-               <dependency>
-                       <groupId>org.scala-lang</groupId>
-                       <artifactId>scala-library</artifactId>
-                       <version>${scala.version}</version>
-                       <scope>compile</scope>
-               </dependency>
                <dependency>
                        <groupId>org.apache.commons</groupId>
                        <artifactId>commons-compress</artifactId>
-                       <version>1.8</version>
                        <scope>compile</scope>
                </dependency>
 
-               <!-- other provided dependencies -->
+               <!-- provided scope -->
+               <dependency>
+                       <groupId>org.scala-lang</groupId>
+                       <artifactId>scala-library</artifactId>
+                       <scope>provided</scope>
+               </dependency>
                <dependency>
                        <groupId>org.apache.spark</groupId>
                        
<artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -106,81 +102,26 @@
                <dependency>
                        <groupId>org.apache.hivemall</groupId>
                        <artifactId>hivemall-mixserv</artifactId>
-                       <version>${project.version}</version>
                        <scope>test</scope>
                </dependency>
                <dependency>
                        <groupId>org.xerial</groupId>
                        <artifactId>xerial-core</artifactId>
-                       <version>3.2.3</version>
                        <scope>test</scope>
                </dependency>
                <dependency>
                        <groupId>org.scalatest</groupId>
                        
<artifactId>scalatest_${scala.binary.version}</artifactId>
-                       <version>2.2.4</version>
                        <scope>test</scope>
                </dependency>
        </dependencies>
 
        <build>
-               <directory>target</directory>
-               <outputDirectory>target/classes</outputDirectory>
-               
<finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
-               <testOutputDirectory>target/test-classes</testOutputDirectory>
                <plugins>
-                       <!-- For incremental compilation -->
-                       <plugin>
-                               <groupId>net.alchim31.maven</groupId>
-                               <artifactId>scala-maven-plugin</artifactId>
-                               <version>3.2.2</version>
-                               <executions>
-                                       <execution>
-                                               <id>scala-compile-first</id>
-                                               <phase>process-resources</phase>
-                                               <goals>
-                                                       <goal>compile</goal>
-                                               </goals>
-                                       </execution>
-                                       <execution>
-                                               
<id>scala-test-compile-first</id>
-                                               
<phase>process-test-resources</phase>
-                                               <goals>
-                                                       <goal>testCompile</goal>
-                                               </goals>
-                                       </execution>
-                               </executions>
-                               <configuration>
-                                       
<scalaVersion>${scala.version}</scalaVersion>
-                                       
<recompileMode>incremental</recompileMode>
-                                       <useZincServer>true</useZincServer>
-                                       <args>
-                                               <arg>-unchecked</arg>
-                                               <arg>-deprecation</arg>
-                                               <!-- TODO: To enable this 
option, we need to fix many wornings -->
-                                               <!-- <arg>-feature</arg> -->
-                                       </args>
-                                       <jvmArgs>
-                                               <jvmArg>-Xms512m</jvmArg>
-                                               <jvmArg>-Xmx1024m</jvmArg>
-                                       </jvmArgs>
-                               </configuration>
-                       </plugin>
-                       <!-- hivemall-spark_xx-xx.jar -->
-                       <plugin>
-                               <groupId>org.apache.maven.plugins</groupId>
-                               <artifactId>maven-jar-plugin</artifactId>
-                               <version>2.5</version>
-                               <configuration>
-                                       
<finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
-                                       
<outputDirectory>${project.parent.build.directory}</outputDirectory>
-                               </configuration>
-                       </plugin>
                        <!-- hivemall-spark_xx-xx-with-dependencies.jar 
including minimum dependencies -->
                        <plugin>
                                <groupId>org.apache.maven.plugins</groupId>
                                <artifactId>maven-shade-plugin</artifactId>
-                               <version>3.1.0</version>
                                <executions>
                                        <execution>
                                                <id>jar-with-dependencies</id>
@@ -189,8 +130,8 @@
                                                        <goal>shade</goal>
                                                </goals>
                                                <configuration>
-                                                       
<finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}-with-dependencies</finalName>
-                                                       
<outputDirectory>${project.parent.build.directory}</outputDirectory>
+                                                       
<finalName>${project.artifactId}-${project.version}-with-dependencies</finalName>
+                                                       
<outputDirectory>${project.parent.parent.build.directory}</outputDirectory>
                                                        
<minimizeJar>false</minimizeJar>
                                                        
<createDependencyReducedPom>false</createDependencyReducedPom>
                                                        <artifactSet>
@@ -213,7 +154,6 @@
                        <plugin>
                                <groupId>org.apache.maven.plugins</groupId>
                                <artifactId>maven-surefire-plugin</artifactId>
-                               <version>2.7</version>
                                <configuration>
                                        <skipTests>true</skipTests>
                                </configuration>
@@ -234,6 +174,7 @@
                                                
<SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
                                                <SPARK_TESTING>1</SPARK_TESTING>
                                                
<JAVA_HOME>${env.JAVA_HOME}</JAVA_HOME>
+                                               
<PATH>${env.JAVA_HOME}/bin:${env.PATH}</PATH>
                                        </environmentVariables>
                                        <systemProperties>
                                                
<log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/spark-2.1/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
----------------------------------------------------------------------
diff --git 
a/spark/spark-2.1/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
 
b/spark/spark-2.1/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
new file mode 100644
index 0000000..a6bbb4b
--- /dev/null
+++ 
b/spark/spark-2.1/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.spark.streaming
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.ml.feature.HivemallLabeledPoint
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+import org.apache.spark.streaming.dstream.DStream
+
+final class HivemallStreamingOps(ds: DStream[HivemallLabeledPoint]) {
+
+  def predict[U: ClassTag](f: DataFrame => DataFrame)(implicit sqlContext: 
SQLContext)
+      : DStream[Row] = {
+    ds.transform[Row] { rdd: RDD[HivemallLabeledPoint] =>
+      f(sqlContext.createDataFrame(rdd)).rdd
+    }
+  }
+}
+
+object HivemallStreamingOps {
+
+  /**
+   * Implicitly inject the [[HivemallStreamingOps]] into [[DStream]].
+   */
+  implicit def dataFrameToHivemallStreamingOps(ds: 
DStream[HivemallLabeledPoint])
+      : HivemallStreamingOps = {
+    new HivemallStreamingOps(ds)
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/spark-2.2/pom.xml
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/pom.xml b/spark/spark-2.2/pom.xml
index 8a5ed00..1d9e3c8 100644
--- a/spark/spark-2.2/pom.xml
+++ b/spark/spark-2.2/pom.xml
@@ -22,34 +22,35 @@
 
        <parent>
                <groupId>org.apache.hivemall</groupId>
-               <artifactId>hivemall</artifactId>
+               <artifactId>hivemall-spark</artifactId>
                <version>0.5.1-incubating-SNAPSHOT</version>
-               <relativePath>../../pom.xml</relativePath>
+               <relativePath>../pom.xml</relativePath>
        </parent>
 
-       <artifactId>hivemall-spark</artifactId>
+       <artifactId>hivemall-spark_2.2</artifactId>
        <name>Hivemall on Spark 2.2</name>
        <packaging>jar</packaging>
 
        <properties>
-               <PermGen>64m</PermGen>
-               <MaxPermGen>512m</MaxPermGen>
-               <CodeCacheSize>512m</CodeCacheSize>
-               <main.basedir>${project.parent.basedir}</main.basedir>
+               <main.basedir>${project.parent.parent.basedir}</main.basedir>
+               <spark.version>2.2.0</spark.version>
+               <spark.binary.version>2.2</spark.binary.version>
+               <java.source.version>1.8</java.source.version>
+               <java.target.version>1.8</java.target.version>
+               <maven.compiler.source>1.8</maven.compiler.source>
+               <maven.compiler.target>1.8</maven.compiler.target>
        </properties>
 
        <dependencies>
-               <!-- hivemall dependencies -->
+               <!-- compile scope -->
                <dependency>
                        <groupId>org.apache.hivemall</groupId>
                        <artifactId>hivemall-core</artifactId>
-                       <version>${project.version}</version>
                        <scope>compile</scope>
                </dependency>
                <dependency>
                        <groupId>org.apache.hivemall</groupId>
                        <artifactId>hivemall-xgboost</artifactId>
-                       <version>${project.version}</version>
                        <scope>compile</scope>
                </dependency>
                <dependency>
@@ -58,22 +59,18 @@
                        <version>${project.version}</version>
                        <scope>compile</scope>
                </dependency>
-
-               <!-- third-party dependencies -->
-               <dependency>
-                       <groupId>org.scala-lang</groupId>
-                       <artifactId>scala-library</artifactId>
-                       <version>${scala.version}</version>
-                       <scope>compile</scope>
-               </dependency>
                <dependency>
                        <groupId>org.apache.commons</groupId>
                        <artifactId>commons-compress</artifactId>
-                       <version>1.8</version>
                        <scope>compile</scope>
                </dependency>
 
-               <!-- other provided dependencies -->
+               <!-- provided scope -->
+               <dependency>
+                       <groupId>org.scala-lang</groupId>
+                       <artifactId>scala-library</artifactId>
+                       <scope>provided</scope>
+               </dependency>
                <dependency>
                        <groupId>org.apache.spark</groupId>
                        
<artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -109,84 +106,26 @@
                <dependency>
                        <groupId>org.apache.hivemall</groupId>
                        <artifactId>hivemall-mixserv</artifactId>
-                       <version>${project.version}</version>
                        <scope>test</scope>
                </dependency>
                <dependency>
                        <groupId>org.xerial</groupId>
                        <artifactId>xerial-core</artifactId>
-                       <version>3.2.3</version>
                        <scope>test</scope>
                </dependency>
                <dependency>
                        <groupId>org.scalatest</groupId>
                        
<artifactId>scalatest_${scala.binary.version}</artifactId>
-                       <version>2.2.4</version>
                        <scope>test</scope>
                </dependency>
        </dependencies>
 
        <build>
-               <directory>target</directory>
-               <outputDirectory>target/classes</outputDirectory>
-               
<finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
-               <testOutputDirectory>target/test-classes</testOutputDirectory>
                <plugins>
-                       <!-- For incremental compilation -->
-                       <plugin>
-                               <groupId>net.alchim31.maven</groupId>
-                               <artifactId>scala-maven-plugin</artifactId>
-                               <version>3.2.2</version>
-                               <executions>
-                                       <execution>
-                                               <id>scala-compile-first</id>
-                                               <phase>process-resources</phase>
-                                               <goals>
-                                                       <goal>compile</goal>
-                                               </goals>
-                                       </execution>
-                                       <execution>
-                                               
<id>scala-test-compile-first</id>
-                                               
<phase>process-test-resources</phase>
-                                               <goals>
-                                                       <goal>testCompile</goal>
-                                               </goals>
-                                       </execution>
-                               </executions>
-                               <configuration>
-                                       
<scalaVersion>${scala.version}</scalaVersion>
-                                       
<recompileMode>incremental</recompileMode>
-                                       <useZincServer>true</useZincServer>
-                                       <args>
-                                               <arg>-unchecked</arg>
-                                               <arg>-deprecation</arg>
-                                               <!-- TODO: To enable this 
option, we need to fix many wornings -->
-                                               <!-- <arg>-feature</arg> -->
-                                       </args>
-                                       <jvmArgs>
-                                               <jvmArg>-Xms1024m</jvmArg>
-                                               <jvmArg>-Xmx1024m</jvmArg>
-                                               
<jvmArg>-XX:PermSize=${PermGen}</jvmArg>
-                                               
<jvmArg>-XX:MaxPermSize=${MaxPermGen}</jvmArg>
-                                               
<jvmArg>-XX:ReservedCodeCacheSize=${CodeCacheSize}</jvmArg>
-                                       </jvmArgs>
-                               </configuration>
-                       </plugin>
-                       <!-- hivemall-spark_xx-xx.jar -->
-                       <plugin>
-                               <groupId>org.apache.maven.plugins</groupId>
-                               <artifactId>maven-jar-plugin</artifactId>
-                               <version>2.5</version>
-                               <configuration>
-                                       
<finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}</finalName>
-                                       
<outputDirectory>${project.parent.build.directory}</outputDirectory>
-                               </configuration>
-                       </plugin>
                        <!-- hivemall-spark_xx-xx-with-dependencies.jar 
including minimum dependencies -->
                        <plugin>
                                <groupId>org.apache.maven.plugins</groupId>
                                <artifactId>maven-shade-plugin</artifactId>
-                               <version>3.1.0</version>
                                <executions>
                                        <execution>
                                                <id>jar-with-dependencies</id>
@@ -195,8 +134,8 @@
                                                        <goal>shade</goal>
                                                </goals>
                                                <configuration>
-                                                       
<finalName>${project.artifactId}-${spark.binary.version}_${scala.binary.version}-${project.version}-with-dependencies</finalName>
-                                                       
<outputDirectory>${project.parent.build.directory}</outputDirectory>
+                                                       
<finalName>${project.artifactId}-${project.version}-with-dependencies</finalName>
+                                                       
<outputDirectory>${project.parent.parent.build.directory}</outputDirectory>
                                                        
<minimizeJar>false</minimizeJar>
                                                        
<createDependencyReducedPom>false</createDependencyReducedPom>
                                                        <artifactSet>
@@ -219,7 +158,6 @@
                        <plugin>
                                <groupId>org.apache.maven.plugins</groupId>
                                <artifactId>maven-surefire-plugin</artifactId>
-                               <version>2.7</version>
                                <configuration>
                                        <skipTests>true</skipTests>
                                </configuration>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/spark-common/pom.xml
----------------------------------------------------------------------
diff --git a/spark/spark-common/pom.xml b/spark/spark-common/pom.xml
deleted file mode 100644
index 0665c11..0000000
--- a/spark/spark-common/pom.xml
+++ /dev/null
@@ -1,146 +0,0 @@
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
-       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
-       <modelVersion>4.0.0</modelVersion>
-
-       <parent>
-               <groupId>org.apache.hivemall</groupId>
-               <artifactId>hivemall</artifactId>
-               <version>0.5.1-incubating-SNAPSHOT</version>
-               <relativePath>../../pom.xml</relativePath>
-       </parent>
-
-       <artifactId>hivemall-spark-common</artifactId>
-       <name>Hivemall on Spark Common</name>
-       <packaging>jar</packaging>
-
-       <properties>
-               <main.basedir>${project.parent.basedir}</main.basedir>
-       </properties>
-
-       <dependencies>
-               <!-- hivemall dependencies -->
-               <dependency>
-                       <groupId>org.apache.hivemall</groupId>
-                       <artifactId>hivemall-core</artifactId>
-                       <version>${project.version}</version>
-                       <scope>compile</scope>
-               </dependency>
-
-               <!-- other provided dependencies -->
-               <dependency>
-                       <groupId>org.apache.spark</groupId>
-                       
<artifactId>spark-sql_${scala.binary.version}</artifactId>
-                       <version>${spark.version}</version>
-                       <scope>provided</scope>
-               </dependency>
-               <dependency>
-                       <groupId>org.apache.spark</groupId>
-                       
<artifactId>spark-hive_${scala.binary.version}</artifactId>
-                       <version>${spark.version}</version>
-                       <scope>provided</scope>
-               </dependency>
-               <dependency>
-                       <groupId>org.apache.spark</groupId>
-                       
<artifactId>spark-streaming_${scala.binary.version}</artifactId>
-                       <version>${spark.version}</version>
-                       <scope>provided</scope>
-               </dependency>
-
-               <dependency>
-                       <groupId>org.apache.hadoop</groupId>
-                       <artifactId>hadoop-common</artifactId>
-                       <version>${hadoop.version}</version>
-                       <scope>provided</scope>
-               </dependency>
-               <dependency>
-                       <groupId>org.apache.hadoop</groupId>
-                       <artifactId>hadoop-mapreduce-client-core</artifactId>
-                       <version>${hadoop.version}</version>
-                       <scope>provided</scope>
-               </dependency>
-               <dependency>
-                       <groupId>org.apache.hive</groupId>
-                       <artifactId>hive-exec</artifactId>
-                       <version>${hive.version}</version>
-                       <scope>provided</scope>
-               </dependency>
-       </dependencies>
-
-       <build>
-               <directory>target</directory>
-               <outputDirectory>target/classes</outputDirectory>
-               <finalName>${project.artifactId}-${project.version}</finalName>
-               <testOutputDirectory>target/test-classes</testOutputDirectory>
-               <plugins>
-                       <!-- For resolving spark binary incompatibility -->
-                       <plugin>
-                               <artifactId>maven-clean-plugin</artifactId>
-                               <version>3.0.0</version>
-                               <executions>
-                                       <execution>
-                                               <phase>initialize</phase>
-                                               <goals>
-                                                       <goal>clean</goal>
-                                               </goals>
-                                       </execution>
-                               </executions>
-                       </plugin>
-                       <!-- For incremental compilation -->
-                       <plugin>
-                               <groupId>net.alchim31.maven</groupId>
-                               <artifactId>scala-maven-plugin</artifactId>
-                               <version>3.2.2</version>
-                               <executions>
-                                       <execution>
-                                               <id>scala-compile-first</id>
-                                               <phase>process-resources</phase>
-                                               <goals>
-                                                       <goal>compile</goal>
-                                               </goals>
-                                       </execution>
-                                       <execution>
-                                               
<id>scala-test-compile-first</id>
-                                               
<phase>process-test-resources</phase>
-                                               <goals>
-                                                       <goal>testCompile</goal>
-                                               </goals>
-                                       </execution>
-                               </executions>
-                               <configuration>
-                                       
<scalaVersion>${scala.version}</scalaVersion>
-                                       
<recompileMode>incremental</recompileMode>
-                                       <useZincServer>true</useZincServer>
-                                       <args>
-                                               <arg>-unchecked</arg>
-                                               <arg>-deprecation</arg>
-                                               <!-- TODO: To enable this 
option, we need to fix many wornings -->
-                                               <!-- <arg>-feature</arg> -->
-                                       </args>
-                                       <jvmArgs>
-                                               <jvmArg>-Xms512m</jvmArg>
-                                               <jvmArg>-Xmx1024m</jvmArg>
-                                       </jvmArgs>
-                               </configuration>
-                       </plugin>
-               </plugins>
-       </build>
-</project>
-

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/spark-common/scalastyle-config.xml
----------------------------------------------------------------------
diff --git a/spark/spark-common/scalastyle-config.xml 
b/spark/spark-common/scalastyle-config.xml
deleted file mode 100644
index 13d1c47..0000000
--- a/spark/spark-common/scalastyle-config.xml
+++ /dev/null
@@ -1,333 +0,0 @@
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<!--
-If you wish to turn off checking for a section of code, you can put a comment 
in the source
-before and after the section, with the following syntax:
-
-  // scalastyle:off
-  ...  // stuff that breaks the styles
-  // scalastyle:on
-
-You can also disable only one rule, by specifying its rule id, as specified in:
-  http://www.scalastyle.org/rules-0.7.0.html
-
-  // scalastyle:off no.finalize
-  override def finalize(): Unit = ...
-  // scalastyle:on no.finalize
-
-This file is divided into 3 sections:
- (1) rules that we enforce.
- (2) rules that we would like to enforce, but haven't cleaned up the codebase 
to turn on yet
-     (or we need to make the scalastyle rule more configurable).
- (3) rules that we don't want to enforce.
--->
-
-<scalastyle>
-  <name>Scalastyle standard configuration</name>
-
-  <!-- 
================================================================================
 -->
-  <!--                               rules we enforce                          
         -->
-  <!-- 
================================================================================
 -->
-
-  <check level="error" class="org.scalastyle.file.FileTabChecker" 
enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.file.HeaderMatchesChecker" 
enabled="true">
-    <parameters>
-       <parameter name="header"><![CDATA[/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */]]></parameter>
-    </parameters>
-  </check>
-
-  <check level="error" 
class="org.scalastyle.scalariform.SpacesAfterPlusChecker" 
enabled="true"></check>
-
-  <check level="error" 
class="org.scalastyle.scalariform.SpacesBeforePlusChecker" 
enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" 
enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.file.FileLineLengthChecker" 
enabled="true">
-    <parameters>
-      <parameter name="maxLineLength"><![CDATA[100]]></parameter>
-      <parameter name="tabSize"><![CDATA[2]]></parameter>
-      <parameter name="ignoreImports">true</parameter>
-    </parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" 
enabled="true">
-    <parameters><parameter 
name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" 
enabled="true">
-    <parameters><parameter 
name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
-  </check>
-
-  <check level="error" 
class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
-    <parameters><parameter 
name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters>
-  </check>
-
-  <check level="error" 
class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
-    <parameters><parameter 
name="maxParameters"><![CDATA[10]]></parameter></parameters>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" 
enabled="true"></check>
-
-  <check level="error" 
class="org.scalastyle.scalariform.CovariantEqualsChecker" 
enabled="true"></check>
-
-  <check level="error" 
class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" 
enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.scalariform.IfBraceChecker" 
enabled="true">
-    <parameters>
-      <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
-      <parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
-    </parameters>
-  </check>
-
-  <check level="error" 
class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" 
enabled="true"></check>
-
-  <check level="error" class="org.scalastyle.file.NewLineAtEofChecker" 
enabled="true"></check>
-
-  <check customId="nonascii" level="error" 
class="org.scalastyle.scalariform.NonASCIICharacterChecker" 
enabled="true"></check>
-
-  <check level="error" 
class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" 
enabled="true"></check>
-
-  <check level="error" 
class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" 
enabled="true">
-   <parameters>
-     <parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, 
LARROW, RARROW</parameter>
-   </parameters>
-  </check>
-
-  <check level="error" 
class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" 
enabled="true">
-    <parameters>
-     <parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, 
WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
-    </parameters>
-  </check>
-
-  <!-- ??? usually shouldn't be checked into the code base. -->
-  <check level="error" 
class="org.scalastyle.scalariform.NotImplementedErrorUsage" 
enabled="true"></check>
-
-  <!-- As of SPARK-7977 all printlns need to be wrapped in '// 
scalastyle:off/on println' -->
-  <check customId="println" level="error" 
class="org.scalastyle.scalariform.TokenChecker" enabled="true">
-    <parameters><parameter name="regex">^println$</parameter></parameters>
-    <customMessage><![CDATA[Are you sure you want to println? If yes, wrap the 
code block with
-      // scalastyle:off println
-      println(...)
-      // scalastyle:on println]]></customMessage>
-  </check>
-
-  <check customId="visiblefortesting" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter 
name="regex">@VisibleForTesting</parameter></parameters>
-    <customMessage><![CDATA[
-      @VisibleForTesting causes classpath issues. Please note this in the java 
doc instead (SPARK-11615).
-    ]]></customMessage>
-  </check>
-
-  <check customId="runtimeaddshutdownhook" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter 
name="regex">Runtime\.getRuntime\.addShutdownHook</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In 
most cases, you should use
-      ShutdownHookManager.addShutdownHook instead.
-      If you must use Runtime.getRuntime.addShutdownHook, wrap the code block 
with
-      // scalastyle:off runtimeaddshutdownhook
-      Runtime.getRuntime.addShutdownHook(...)
-      // scalastyle:on runtimeaddshutdownhook
-    ]]></customMessage>
-  </check>
-
-  <check customId="mutablesynchronizedbuffer" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter 
name="regex">mutable\.SynchronizedBuffer</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use mutable.SynchronizedBuffer? In most 
cases, you should use
-      java.util.concurrent.ConcurrentLinkedQueue instead.
-      If you must use mutable.SynchronizedBuffer, wrap the code block with
-      // scalastyle:off mutablesynchronizedbuffer
-      mutable.SynchronizedBuffer[...]
-      // scalastyle:on mutablesynchronizedbuffer
-    ]]></customMessage>
-  </check>
-
-  <check customId="classforname" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">Class\.forName</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use Class.forName? In most cases, you 
should use Utils.classForName instead.
-      If you must use Class.forName, wrap the code block with
-      // scalastyle:off classforname
-      Class.forName(...)
-      // scalastyle:on classforname
-    ]]></customMessage>
-  </check>
-
-  <check customId="awaitresult" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">Await\.result</parameter></parameters>
-    <customMessage><![CDATA[
-      Are you sure that you want to use Await.result? In most cases, you 
should use ThreadUtils.awaitResult instead.
-      If you must use Await.result, wrap the code block with
-      // scalastyle:off awaitresult
-      Await.result(...)
-      // scalastyle:on awaitresult
-    ]]></customMessage>
-  </check>
-
-  <!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters 
-->
-  <check customId="javaconversions" level="error" 
class="org.scalastyle.scalariform.TokenChecker" enabled="true">
-    <parameters><parameter 
name="regex">JavaConversions</parameter></parameters>
-    <customMessage>Instead of importing implicits in 
scala.collection.JavaConversions._, import
-    scala.collection.JavaConverters._ and use .asScala / .asJava 
methods</customMessage>
-  </check>
-
-  <check customId="commonslang2" level="error" 
class="org.scalastyle.scalariform.TokenChecker" enabled="true">
-    <parameters><parameter 
name="regex">org\.apache\.commons\.lang\.</parameter></parameters>
-    <customMessage>Use Commons Lang 3 classes (package 
org.apache.commons.lang3.*) instead
-    of Commons Lang 2 (package org.apache.commons.lang.*)</customMessage>
-  </check>
-
-  <check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" 
enabled="true">
-    <parameters>
-      <parameter name="groups">java,scala,3rdParty,spark</parameter>
-      <parameter name="group.java">javax?\..*</parameter>
-      <parameter name="group.scala">scala\..*</parameter>
-      <parameter name="group.3rdParty">(?!org\.apache\.spark\.).*</parameter>
-      <parameter name="group.spark">org\.apache\.spark\..*</parameter>
-    </parameters>
-  </check>
-
-  <check level="error" 
class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" 
enabled="true">
-    <parameters>
-      <parameter name="tokens">COMMA</parameter>
-    </parameters>
-  </check>
-
-  <!-- SPARK-3854: Single Space between ')' and '{' -->
-  <check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">\)\{</parameter></parameters>
-    <customMessage><![CDATA[
-      Single Space between ')' and `{`.
-    ]]></customMessage>
-  </check>
-
-  <check customId="NoScalaDoc" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1  
[*]</parameter></parameters>
-    <customMessage>Use Javadoc style indentation for multiline 
comments</customMessage>
-  </check>
-
-  <check customId="OmitBracesInCase" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter 
name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
-    <customMessage>Omit braces in case clauses.</customMessage>
-  </check>
-
-  <!-- SPARK-16877: Avoid Java annotations -->
-  <check customId="OverrideJavaCase" level="error" 
class="org.scalastyle.scalariform.TokenChecker" enabled="true">
-    <parameters><parameter name="regex">^Override$</parameter></parameters>
-    <customMessage>override modifier should be used instead of 
@java.lang.Override.</customMessage>
-  </check>
-
-  <check level="error" 
class="org.scalastyle.scalariform.DeprecatedJavaChecker" enabled="true"></check>
-
-  <!-- 
================================================================================
 -->
-  <!--       rules we'd like to enforce, but haven't cleaned up the codebase 
yet        -->
-  <!-- 
================================================================================
 -->
-
-  <!-- We cannot turn the following two on, because it'd fail a lot of string 
interpolation use cases. -->
-  <!-- Ideally the following two rules should be configurable to rule out 
string interpolation. -->
-  <check level="error" 
class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" 
enabled="false"></check>
-  <check level="error" 
class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" 
enabled="false"></check>
-
-  <!-- This breaks symbolic method names so we don't turn it on. -->
-  <!-- Maybe we should update it to allow basic symbolic names, and then we 
are good to go. -->
-  <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" 
enabled="false">
-    <parameters>
-    <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
-    </parameters>
-  </check>
-
-  <!-- Should turn this on, but we have a few places that need to be fixed 
first -->
-  <check level="error" 
class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
-
-  <!-- 
================================================================================
 -->
-  <!--                               rules we don't want                       
         -->
-  <!-- 
================================================================================
 -->
-
-  <check level="error" 
class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false">
-    <parameters><parameter 
name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter></parameters>
-  </check>
-
-  <!-- We want the opposite of this: NewLineAtEofChecker -->
-  <check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" 
enabled="false"></check>
-
-  <!-- This one complains about all kinds of random things. Disable. -->
-  <check level="error" 
class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" 
enabled="false"></check>
-
-  <!-- We use return quite a bit for control flows and guards -->
-  <check level="error" class="org.scalastyle.scalariform.ReturnChecker" 
enabled="false"></check>
-
-  <!-- We use null a lot in low level code and to interface with 3rd party 
code -->
-  <check level="error" class="org.scalastyle.scalariform.NullChecker" 
enabled="false"></check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" 
enabled="false"></check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" class="org.scalastyle.file.FileLengthChecker" 
enabled="false">
-    <parameters><parameter name="maxFileLength">800></parameter></parameters>
-  </check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" 
enabled="false">
-    <parameters><parameter name="maxTypes">30</parameter></parameters>
-  </check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" 
class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false">
-    <parameters><parameter name="maximum">10</parameter></parameters>
-  </check>
-
-  <!-- Doesn't seem super big deal here ... -->
-  <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" 
enabled="false">
-    <parameters><parameter name="maxLength">50</parameter></parameters>
-  </check>
-
-  <!-- Not exactly feasible to enforce this right now. -->
-  <!-- It is also infrequent that somebody introduces a new class with a lot 
of methods. -->
-  <check level="error" 
class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false">
-    <parameters><parameter 
name="maxMethods"><![CDATA[30]]></parameter></parameters>
-  </check>
-
-  <!-- Doesn't seem super big deal here, and we have a lot of magic numbers 
... -->
-  <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" 
enabled="false">
-    <parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters>
-  </check>
-
-</scalastyle>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/spark-common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
----------------------------------------------------------------------
diff --git 
a/spark/spark-common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
 
b/spark/spark-common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
deleted file mode 100644
index cf10ed7..0000000
--- 
a/spark/spark-common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.dataset;
-
-import hivemall.UDTFWithOptions;
-
-import java.lang.reflect.Field;
-import java.lang.reflect.Method;
-import java.util.Random;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.Options;
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.generic.Collector;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-
-/**
- * A wrapper of [[hivemall.dataset.LogisticRegressionDataGeneratorUDTF]]. This 
wrapper is needed
- * because Spark cannot handle HadoopUtils#getTaskId() correctly.
- */
-@Description(name = "lr_datagen",
-        value = "_FUNC_(options string) - Generates a logistic regression 
dataset")
-public final class LogisticRegressionDataGeneratorUDTFWrapper extends 
UDTFWithOptions {
-    private transient LogisticRegressionDataGeneratorUDTF udtf =
-            new LogisticRegressionDataGeneratorUDTF();
-
-    @Override
-    protected Options getOptions() {
-        Options options = null;
-        try {
-            Method m = udtf.getClass().getDeclaredMethod("getOptions");
-            m.setAccessible(true);
-            options = (Options) m.invoke(udtf);
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
-        return options;
-    }
-
-    @SuppressWarnings("all")
-    @Override
-    protected CommandLine processOptions(ObjectInspector[] objectInspectors)
-            throws UDFArgumentException {
-        CommandLine commands = null;
-        try {
-            Method m = udtf.getClass().getDeclaredMethod("processOptions");
-            m.setAccessible(true);
-            commands = (CommandLine) m.invoke(udtf, objectInspectors);
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
-        return commands;
-    }
-
-    @Override
-    public StructObjectInspector initialize(ObjectInspector[] argOIs) throws 
UDFArgumentException {
-        try {
-            // Extract a collector for LogisticRegressionDataGeneratorUDTF
-            Field collector = GenericUDTF.class.getDeclaredField("collector");
-            collector.setAccessible(true);
-            udtf.setCollector((Collector) collector.get(this));
-
-            // To avoid HadoopUtils#getTaskId()
-            Class<?> clazz = udtf.getClass();
-            Field rnd1 = clazz.getDeclaredField("rnd1");
-            Field rnd2 = clazz.getDeclaredField("rnd2");
-            Field r_seed = clazz.getDeclaredField("r_seed");
-            r_seed.setAccessible(true);
-            final long seed = r_seed.getLong(udtf) + (int) 
Thread.currentThread().getId();
-            rnd1.setAccessible(true);
-            rnd2.setAccessible(true);
-            rnd1.set(udtf, new Random(seed));
-            rnd2.set(udtf, new Random(seed + 1));
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
-        return udtf.initialize(argOIs);
-    }
-
-    @Override
-    public void process(Object[] objects) throws HiveException {
-        udtf.process(objects);
-    }
-
-    @Override
-    public void close() throws HiveException {
-        udtf.close();
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/spark-common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java
----------------------------------------------------------------------
diff --git 
a/spark/spark-common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java 
b/spark/spark-common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java
deleted file mode 100644
index b454fd9..0000000
--- a/spark/spark-common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.ftvec;
-
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-
-/**
- * A wrapper of [[hivemall.ftvec.AddBiasUDF]].
- *
- * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark 
cannot handle List<>
- * as a return type in Hive UDF. Therefore, the type must be passed via 
ObjectInspector.
- */
-@Description(name = "add_bias",
-        value = "_FUNC_(features in array<string>) - Returns features with a 
bias as array<string>")
-@UDFType(deterministic = true, stateful = false)
-public class AddBiasUDFWrapper extends GenericUDF {
-    private AddBiasUDF udf = new AddBiasUDF();
-    private ListObjectInspector argumentOI = null;
-
-    @Override
-    public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
-        if (arguments.length != 1) {
-            throw new UDFArgumentLengthException(
-                "add_bias() has an single arguments: array<string> features");
-        }
-
-        switch (arguments[0].getCategory()) {
-            case LIST:
-                argumentOI = (ListObjectInspector) arguments[0];
-                ObjectInspector elmOI = 
argumentOI.getListElementObjectInspector();
-                if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
-                    if (((PrimitiveObjectInspector) 
elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) {
-                        break;
-                    }
-                }
-            default:
-                throw new UDFArgumentTypeException(0, "Type mismatch: 
features");
-        }
-
-        return 
ObjectInspectorFactory.getStandardListObjectInspector(argumentOI.getListElementObjectInspector());
-    }
-
-    @Override
-    public Object evaluate(DeferredObject[] arguments) throws HiveException {
-        assert (arguments.length == 1);
-        @SuppressWarnings("unchecked")
-        final List<String> input = (List<String>) 
argumentOI.getList(arguments[0].get());
-        return udf.evaluate(input);
-    }
-
-    @Override
-    public String getDisplayString(String[] children) {
-        return "add_bias(" + Arrays.toString(children) + ")";
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java
----------------------------------------------------------------------
diff --git 
a/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java
 
b/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java
deleted file mode 100644
index 0b687db..0000000
--- 
a/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.ftvec;
-
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-
-/**
- * A wrapper of [[hivemall.ftvec.AddFeatureIndexUDF]].
- *
- * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark 
cannot handle List<>
- * as a return type in Hive UDF. Therefore, the type must be passed via 
ObjectInspector.
- */
-@Description(
-        name = "add_feature_index",
-        value = "_FUNC_(dense features in array<double>) - Returns a feature 
vector with feature indices")
-@UDFType(deterministic = true, stateful = false)
-public class AddFeatureIndexUDFWrapper extends GenericUDF {
-    private AddFeatureIndexUDF udf = new AddFeatureIndexUDF();
-    private ListObjectInspector argumentOI = null;
-
-    @Override
-    public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
-        if (arguments.length != 1) {
-            throw new UDFArgumentLengthException(
-                "add_feature_index() has an single arguments: array<double> 
features");
-        }
-
-        switch (arguments[0].getCategory()) {
-            case LIST:
-                argumentOI = (ListObjectInspector) arguments[0];
-                ObjectInspector elmOI = 
argumentOI.getListElementObjectInspector();
-                if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
-                    if (((PrimitiveObjectInspector) 
elmOI).getPrimitiveCategory() == PrimitiveCategory.DOUBLE) {
-                        break;
-                    }
-                }
-            default:
-                throw new UDFArgumentTypeException(0, "Type mismatch: 
features");
-        }
-
-        return 
ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
-    }
-
-    @Override
-    public Object evaluate(DeferredObject[] arguments) throws HiveException {
-        assert (arguments.length == 1);
-        @SuppressWarnings("unchecked")
-        final List<Double> input = (List<Double>) 
argumentOI.getList(arguments[0].get());
-        return udf.evaluate(input);
-    }
-
-    @Override
-    public String getDisplayString(String[] children) {
-        return "add_feature_index(" + Arrays.toString(children) + ")";
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/spark-common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java
----------------------------------------------------------------------
diff --git 
a/spark/spark-common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java 
b/spark/spark-common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java
deleted file mode 100644
index 5924468..0000000
--- 
a/spark/spark-common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.ftvec;
-
-import java.util.Arrays;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-
-/**
- * A wrapper of [[hivemall.ftvec.ExtractFeatureUDF]].
- *
- * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark 
cannot handle List<>
- * as a return type in Hive UDF. Therefore, the type must be passed via 
ObjectInspector.
- */
-@Description(name = "extract_feature",
-        value = "_FUNC_(feature in string) - Returns a parsed feature as 
string")
-@UDFType(deterministic = true, stateful = false)
-public class ExtractFeatureUDFWrapper extends GenericUDF {
-    private ExtractFeatureUDF udf = new ExtractFeatureUDF();
-    private PrimitiveObjectInspector argumentOI = null;
-
-    @Override
-    public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
-        if (arguments.length != 1) {
-            throw new UDFArgumentLengthException(
-                "extract_feature() has an single arguments: string feature");
-        }
-
-        argumentOI = (PrimitiveObjectInspector) arguments[0];
-        if (argumentOI.getPrimitiveCategory() != PrimitiveCategory.STRING) {
-            throw new UDFArgumentTypeException(0, "Type mismatch: feature");
-        }
-
-        return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
-    }
-
-    @Override
-    public Object evaluate(DeferredObject[] arguments) throws HiveException {
-        assert (arguments.length == 1);
-        final String input = (String) 
argumentOI.getPrimitiveJavaObject(arguments[0].get());
-        return udf.evaluate(input);
-    }
-
-    @Override
-    public String getDisplayString(String[] children) {
-        return "extract_feature(" + Arrays.toString(children) + ")";
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/spark-common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java
----------------------------------------------------------------------
diff --git 
a/spark/spark-common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java 
b/spark/spark-common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java
deleted file mode 100644
index 8580247..0000000
--- 
a/spark/spark-common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.ftvec;
-
-import java.util.Arrays;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-
-/**
- * A wrapper of [[hivemall.ftvec.ExtractWeightUDF]].
- *
- * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark 
cannot handle List<>
- * as a return type in Hive UDF. Therefore, the type must be passed via 
ObjectInspector.
- */
-@Description(name = "extract_weight",
-        value = "_FUNC_(feature in string) - Returns the weight of a feature 
as string")
-@UDFType(deterministic = true, stateful = false)
-public class ExtractWeightUDFWrapper extends GenericUDF {
-    private ExtractWeightUDF udf = new ExtractWeightUDF();
-    private PrimitiveObjectInspector argumentOI = null;
-
-    @Override
-    public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
-        if (arguments.length != 1) {
-            throw new UDFArgumentLengthException(
-                "extract_weight() has an single arguments: string feature");
-        }
-
-        argumentOI = (PrimitiveObjectInspector) arguments[0];
-        if (argumentOI.getPrimitiveCategory() != PrimitiveCategory.STRING) {
-            throw new UDFArgumentTypeException(0, "Type mismatch: feature");
-        }
-
-        return 
PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.DOUBLE);
-    }
-
-    @Override
-    public Object evaluate(DeferredObject[] arguments) throws HiveException {
-        assert (arguments.length == 1);
-        final String input = (String) 
argumentOI.getPrimitiveJavaObject(arguments[0].get());
-        return udf.evaluate(input);
-    }
-
-    @Override
-    public String getDisplayString(String[] children) {
-        return "extract_weight(" + Arrays.toString(children) + ")";
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/spark-common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java
----------------------------------------------------------------------
diff --git 
a/spark/spark-common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java 
b/spark/spark-common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java
deleted file mode 100644
index 584be6c..0000000
--- 
a/spark/spark-common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.ftvec;
-
-import java.util.Arrays;
-import java.util.Map;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.IntWritable;
-
-/**
- * A wrapper of [[hivemall.ftvec.SortByFeatureUDF]].
- *
- * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark 
cannot handle Map<>
- * as a return type in Hive UDF. Therefore, the type must be passed via 
ObjectInspector.
- */
-@Description(name = "sort_by_feature",
-        value = "_FUNC_(map in map<int,float>) - Returns a sorted map")
-@UDFType(deterministic = true, stateful = false)
-public class SortByFeatureUDFWrapper extends GenericUDF {
-    private SortByFeatureUDF udf = new SortByFeatureUDF();
-    private MapObjectInspector argumentOI = null;
-
-    @Override
-    public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
-        if (arguments.length != 1) {
-            throw new UDFArgumentLengthException(
-                "sorted_by_feature() has an single arguments: map<int, float> 
map");
-        }
-
-        switch (arguments[0].getCategory()) {
-            case MAP:
-                argumentOI = (MapObjectInspector) arguments[0];
-                ObjectInspector keyOI = argumentOI.getMapKeyObjectInspector();
-                ObjectInspector valueOI = 
argumentOI.getMapValueObjectInspector();
-                if (keyOI.getCategory().equals(Category.PRIMITIVE)
-                        && valueOI.getCategory().equals(Category.PRIMITIVE)) {
-                    final PrimitiveCategory keyCategory = 
((PrimitiveObjectInspector) keyOI).getPrimitiveCategory();
-                    final PrimitiveCategory valueCategory = 
((PrimitiveObjectInspector) valueOI).getPrimitiveCategory();
-                    if (keyCategory == PrimitiveCategory.INT
-                            && valueCategory == PrimitiveCategory.FLOAT) {
-                        break;
-                    }
-                }
-            default:
-                throw new UDFArgumentTypeException(0, "Type mismatch: map");
-        }
-
-
-        return ObjectInspectorFactory.getStandardMapObjectInspector(
-            argumentOI.getMapKeyObjectInspector(), 
argumentOI.getMapValueObjectInspector());
-    }
-
-    @Override
-    public Object evaluate(DeferredObject[] arguments) throws HiveException {
-        assert (arguments.length == 1);
-        @SuppressWarnings("unchecked")
-        final Map<IntWritable, FloatWritable> input = (Map<IntWritable, 
FloatWritable>) argumentOI.getMap(arguments[0].get());
-        return udf.evaluate(input);
-    }
-
-    @Override
-    public String getDisplayString(String[] children) {
-        return "sort_by_feature(" + Arrays.toString(children) + ")";
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/spark-common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
----------------------------------------------------------------------
diff --git 
a/spark/spark-common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
 
b/spark/spark-common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
deleted file mode 100644
index db533be..0000000
--- 
a/spark/spark-common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.ftvec.scaling;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
-import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
-import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.io.Text;
-
-/**
- * A wrapper of [[hivemall.ftvec.scaling.L2NormalizationUDF]].
- *
- * NOTE: This is needed to avoid the issue of Spark reflection. That is, 
spark-1.3 cannot handle
- * List<> as a return type in Hive UDF. The type must be passed via 
ObjectInspector. This issues has
- * been reported in SPARK-6747, so a future release of Spark makes the wrapper 
obsolete.
- */
-public class L2NormalizationUDFWrapper extends GenericUDF {
-    private L2NormalizationUDF udf = new L2NormalizationUDF();
-
-    private transient List<Text> retValue = new ArrayList<Text>();
-    private transient Converter toListText = null;
-
-    @Override
-    public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
-        if (arguments.length != 1) {
-            throw new UDFArgumentLengthException("normalize() has an only 
single argument.");
-        }
-
-        switch (arguments[0].getCategory()) {
-            case LIST:
-                ObjectInspector elmOI = ((ListObjectInspector) 
arguments[0]).getListElementObjectInspector();
-                if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
-                    if (((PrimitiveObjectInspector) 
elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) {
-                        break;
-                    }
-                }
-            default:
-                throw new UDFArgumentTypeException(0,
-                    "normalize() must have List[String] as an argument, but "
-                            + arguments[0].getTypeName() + " was found.");
-        }
-
-        // Create a ObjectInspector converter for arguments
-        ObjectInspector outputElemOI = 
ObjectInspectorFactory.getReflectionObjectInspector(
-            Text.class, ObjectInspectorOptions.JAVA);
-        ObjectInspector outputOI = 
ObjectInspectorFactory.getStandardListObjectInspector(outputElemOI);
-        toListText = ObjectInspectorConverters.getConverter(arguments[0], 
outputOI);
-
-        ObjectInspector listElemOI = 
PrimitiveObjectInspectorFactory.javaStringObjectInspector;
-        ObjectInspector returnElemOI = 
ObjectInspectorUtils.getStandardObjectInspector(listElemOI);
-        return 
ObjectInspectorFactory.getStandardListObjectInspector(returnElemOI);
-    }
-
-    @Override
-    public Object evaluate(DeferredObject[] arguments) throws HiveException {
-        assert (arguments.length == 1);
-        @SuppressWarnings("unchecked")
-        final List<Text> input = (List<Text>) 
toListText.convert(arguments[0].get());
-        retValue = udf.evaluate(input);
-        return retValue;
-    }
-
-    @Override
-    public String getDisplayString(String[] children) {
-        return "normalize(" + Arrays.toString(children) + ")";
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/spark-common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java
----------------------------------------------------------------------
diff --git 
a/spark/spark-common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java 
b/spark/spark-common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java
deleted file mode 100644
index d3bcbe6..0000000
--- a/spark/spark-common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.knn.lsh;
-
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-
-/** A wrapper of [[hivemall.knn.lsh.MinHashesUDF]]. */
-@Description(
-        name = "minhashes",
-        value = "_FUNC_(features in array<string>, noWeight in boolean) - 
Returns hashed features as array<int>")
-@UDFType(deterministic = true, stateful = false)
-public class MinHashesUDFWrapper extends GenericUDF {
-    private MinHashesUDF udf = new MinHashesUDF();
-    private ListObjectInspector featuresOI = null;
-    private PrimitiveObjectInspector noWeightOI = null;
-
-    @Override
-    public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
-        if (arguments.length != 2) {
-            throw new UDFArgumentLengthException(
-                "minhashes() has 2 arguments: array<string> features, boolean 
noWeight");
-        }
-
-        // Check argument types
-        switch (arguments[0].getCategory()) {
-            case LIST:
-                featuresOI = (ListObjectInspector) arguments[0];
-                ObjectInspector elmOI = 
featuresOI.getListElementObjectInspector();
-                if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
-                    if (((PrimitiveObjectInspector) 
elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) {
-                        break;
-                    }
-                }
-            default:
-                throw new UDFArgumentTypeException(0, "Type mismatch: 
features");
-        }
-
-        noWeightOI = (PrimitiveObjectInspector) arguments[1];
-        if (noWeightOI.getPrimitiveCategory() != PrimitiveCategory.BOOLEAN) {
-            throw new UDFArgumentException("Type mismatch: noWeight");
-        }
-
-        return 
ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.INT));
-    }
-
-    @Override
-    public Object evaluate(DeferredObject[] arguments) throws HiveException {
-        assert (arguments.length == 2);
-        @SuppressWarnings("unchecked")
-        final List<String> features = (List<String>) 
featuresOI.getList(arguments[0].get());
-        final Boolean noWeight = 
PrimitiveObjectInspectorUtils.getBoolean(arguments[1].get(),
-            noWeightOI);
-        return udf.evaluate(features, noWeight);
-    }
-
-    @Override
-    public String getDisplayString(String[] children) {
-        /**
-         * TODO: Need to return hive-specific type names.
-         */
-        return "minhashes(" + Arrays.toString(children) + ")";
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/spark-common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java
----------------------------------------------------------------------
diff --git 
a/spark/spark-common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java 
b/spark/spark-common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java
deleted file mode 100644
index f386223..0000000
--- 
a/spark/spark-common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.tools.mapred;
-
-import java.util.UUID;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-
-/** An alternative implementation of [[hivemall.tools.mapred.RowIdUDF]]. */
-@Description(
-        name = "rowid",
-        value = "_FUNC_() - Returns a generated row id of a form 
{TASK_ID}-{UUID}-{SEQUENCE_NUMBER}")
-@UDFType(deterministic = false, stateful = true)
-public class RowIdUDFWrapper extends GenericUDF {
-    // RowIdUDF is directly used because spark cannot
-    // handle HadoopUtils#getTaskId().
-
-    private long sequence;
-    private long taskId;
-
-    public RowIdUDFWrapper() {
-        this.sequence = 0L;
-        this.taskId = Thread.currentThread().getId();
-    }
-
-    @Override
-    public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
-        if (arguments.length != 0) {
-            throw new UDFArgumentLengthException("row_number() has no 
argument.");
-        }
-
-        return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
-    }
-
-    @Override
-    public Object evaluate(DeferredObject[] arguments) throws HiveException {
-        assert (arguments.length == 0);
-        sequence++;
-        /**
-         * TODO: Check if it is unique over all tasks in executors of Spark.
-         */
-        return taskId + "-" + UUID.randomUUID() + "-" + sequence;
-    }
-
-    @Override
-    public String getDisplayString(String[] children) {
-        return "row_number()";
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/spark-common/src/main/scala/hivemall/HivemallException.scala
----------------------------------------------------------------------
diff --git a/spark/spark-common/src/main/scala/hivemall/HivemallException.scala 
b/spark/spark-common/src/main/scala/hivemall/HivemallException.scala
deleted file mode 100644
index 53f6756..0000000
--- a/spark/spark-common/src/main/scala/hivemall/HivemallException.scala
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall
-
-class HivemallException(message: String, cause: Throwable)
-    extends Exception(message, cause) {
-
-  def this(message: String) = this(message, null)
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/spark-common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
----------------------------------------------------------------------
diff --git 
a/spark/spark-common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
 
b/spark/spark-common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
deleted file mode 100644
index 3fb2d18..0000000
--- 
a/spark/spark-common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.spark.ml.feature
-
-import java.util.StringTokenizer
-
-import scala.collection.mutable.ListBuffer
-
-import hivemall.HivemallException
-
-// Used for DataFrame#explode
-case class HivemallFeature(feature: String)
-
-/**
- * Class that represents the features and labels of a data point for Hivemall.
- *
- * @param label Label for this data point.
- * @param features List of features for this data point.
- */
-case class HivemallLabeledPoint(label: Float = 0.0f, features: Seq[String]) {
-  override def toString: String = {
-    "%s,%s".format(label, features.mkString("[", ",", "]"))
-  }
-}
-
-object HivemallLabeledPoint {
-
-  // Simple parser for HivemallLabeledPoint
-  def parse(s: String): HivemallLabeledPoint = {
-    val (label, features) = s.indexOf(',') match {
-      case d if d > 0 => (s.substring(0, d), s.substring(d + 1))
-      case _ => ("0.0", "[]") // Dummy
-    }
-    HivemallLabeledPoint(label.toFloat, parseTuple(new 
StringTokenizer(features, "[],", true)))
-  }
-
-  // TODO: Support to parse rows without labels
-  private[this] def parseTuple(tokenizer: StringTokenizer): Seq[String] = {
-    val items = ListBuffer.empty[String]
-    var parsing = true
-    var allowDelim = false
-    while (parsing && tokenizer.hasMoreTokens()) {
-      val token = tokenizer.nextToken()
-      if (token == "[") {
-        items ++= parseTuple(tokenizer)
-        parsing = false
-        allowDelim = true
-      } else if (token == ",") {
-        if (allowDelim) {
-          allowDelim = false
-        } else {
-          throw new HivemallException("Found ',' at a wrong position.")
-        }
-      } else if (token == "]") {
-        parsing = false
-      } else {
-        items.append(token)
-        allowDelim = true
-      }
-    }
-    if (parsing) {
-      throw new HivemallException(s"A tuple must end with ']'.")
-    }
-    items
-  }
-}

[2/3] incubator-hivemall git commit: [SPARK] Avoid using -profile for Spark module and Make spark-2.0/2.1/2.2 and spark-common as the submodule of hivemall-spark

Reply via email to