2.2 and spark-common as the submodule of hivemall-spark

myui Mon, 22 Jan 2018 03:03:08 -0800

[SPARK] Avoid using -profile for Spark module and Make spark-2.0/2.1/2.2 and 
spark-common as the submodule of hivemall-spark



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/e2fb6f86
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/e2fb6f86
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/e2fb6f86

Branch: refs/heads/v0.5.0
Commit: e2fb6f8661ebbd34be7040ba3dd77d4522cd5a54
Parents: 0343700
Author: Makoto Yui <[email protected]>
Authored: Mon Jan 22 19:59:52 2018 +0900
Committer: Makoto Yui <[email protected]>
Committed: Mon Jan 22 19:59:52 2018 +0900

----------------------------------------------------------------------
 core/pom.xml                                    |  41 ---
 mixserv/pom.xml                                 |  29 --
 nlp/pom.xml                                     |  38 ---
 pom.xml                                         | 166 ++++++---
 spark/common/.cache-main                        | Bin 0 -> 12486 bytes
 spark/common/pom.xml                            |  65 ++++
 ...isticRegressionDataGeneratorUDTFWrapper.java | 109 ++++++
 .../java/hivemall/ftvec/AddBiasUDFWrapper.java  |  83 +++++
 .../ftvec/AddFeatureIndexUDFWrapper.java        |  85 +++++
 .../ftvec/ExtractFeatureUDFWrapper.java         |  73 ++++
 .../hivemall/ftvec/ExtractWeightUDFWrapper.java |  73 ++++
 .../hivemall/ftvec/SortByFeatureUDFWrapper.java |  92 +++++
 .../scaling/L2NormalizationUDFWrapper.java      |  95 ++++++
 .../hivemall/knn/lsh/MinHashesUDFWrapper.java   |  93 ++++++
 .../hivemall/tools/mapred/RowIdUDFWrapper.java  |  72 ++++
 .../main/scala/hivemall/HivemallException.scala |  25 ++
 .../spark/ml/feature/HivemallLabeledPoint.scala |  82 +++++
 spark/pom.xml                                   | 159 +++++++++
 spark/scalastyle-config.xml                     | 333 +++++++++++++++++++
 spark/spark-2.0/pom.xml                         |  91 +----
 .../spark/streaming/HivemallStreamingOps.scala  |  47 +++
 spark/spark-2.1/pom.xml                         |  89 +----
 .../spark/streaming/HivemallStreamingOps.scala  |  47 +++
 spark/spark-2.2/pom.xml                         | 100 ++----
 spark/spark-common/pom.xml                      | 146 --------
 spark/spark-common/scalastyle-config.xml        | 333 -------------------
 ...isticRegressionDataGeneratorUDTFWrapper.java | 109 ------
 .../java/hivemall/ftvec/AddBiasUDFWrapper.java  |  83 -----
 .../ftvec/AddFeatureIndexUDFWrapper.java        |  85 -----
 .../ftvec/ExtractFeatureUDFWrapper.java         |  73 ----
 .../hivemall/ftvec/ExtractWeightUDFWrapper.java |  73 ----
 .../hivemall/ftvec/SortByFeatureUDFWrapper.java |  92 -----
 .../scaling/L2NormalizationUDFWrapper.java      |  95 ------
 .../hivemall/knn/lsh/MinHashesUDFWrapper.java   |  93 ------
 .../hivemall/tools/mapred/RowIdUDFWrapper.java  |  72 ----
 .../main/scala/hivemall/HivemallException.scala |  25 --
 .../spark/ml/feature/HivemallLabeledPoint.scala |  82 -----
 .../spark/streaming/HivemallStreamingOps.scala  |  47 ---
 xgboost/pom.xml                                 |  34 --
 39 files changed, 1702 insertions(+), 1827 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/core/pom.xml
----------------------------------------------------------------------
diff --git a/core/pom.xml b/core/pom.xml
index 05f6ec2..c87ded1 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -39,67 +39,41 @@
                <dependency>
                        <groupId>org.apache.hadoop</groupId>
                        <artifactId>hadoop-common</artifactId>
-                       <version>${hadoop.version}</version>
                        <scope>provided</scope>
                </dependency>
                <dependency>
                        <groupId>org.apache.hadoop</groupId>
                        <artifactId>hadoop-mapreduce-client-core</artifactId>
-                       <version>${hadoop.version}</version>
                        <scope>provided</scope>
                </dependency>
                <dependency>
                        <groupId>org.apache.hive</groupId>
                        <artifactId>hive-exec</artifactId>
-                       <version>${hive.version}</version>
                        <scope>provided</scope>
-                       <exclusions>
-                               <exclusion>
-                                       <artifactId>jetty</artifactId>
-                                       <groupId>org.mortbay.jetty</groupId>
-                               </exclusion>
-                               <exclusion>
-                                       <groupId>javax.jdo</groupId>
-                                       <artifactId>jdo2-api</artifactId>
-                               </exclusion>
-                               <exclusion>
-                                       <groupId>asm-parent</groupId>
-                                       <artifactId>asm-parent</artifactId>
-                               </exclusion>
-                               <exclusion>
-                                       <groupId>asm</groupId>
-                                       <artifactId>asm</artifactId>
-                               </exclusion>
-                       </exclusions>
                </dependency>
                <dependency>
                        <groupId>commons-cli</groupId>
                        <artifactId>commons-cli</artifactId>
-                       <version>1.2</version>
                        <scope>provided</scope>
                </dependency>
                <dependency>
                        <groupId>commons-logging</groupId>
                        <artifactId>commons-logging</artifactId>
-                       <version>1.0.4</version>
                        <scope>provided</scope>
                </dependency>
                <dependency>
                        <groupId>log4j</groupId>
                        <artifactId>log4j</artifactId>
-                       <version>1.2.17</version>
                        <scope>provided</scope>
                </dependency>
                <dependency>
                        <groupId>javax.jdo</groupId>
                        <artifactId>jdo2-api</artifactId>
-                       <version>2.3-eb</version>
                        <scope>provided</scope>
                </dependency>
                <dependency>
                        <groupId>com.google.guava</groupId>
                        <artifactId>guava</artifactId>
-                       <version>${guava.version}</version>
                        <scope>provided</scope>
                </dependency>
 
@@ -159,25 +133,21 @@
                <dependency>
                        <groupId>junit</groupId>
                        <artifactId>junit</artifactId>
-                       <version>${junit.version}</version>
                        <scope>test</scope>
                </dependency>
                <dependency>
                        <groupId>org.mockito</groupId>
                        <artifactId>mockito-core</artifactId>
-                       <version>1.10.19</version>
                        <scope>test</scope>
                </dependency>
                <dependency>
                        <groupId>org.powermock</groupId>
                        <artifactId>powermock-module-junit4</artifactId>
-                       <version>1.6.3</version>
                        <scope>test</scope>
                </dependency>
                <dependency>
                        <groupId>org.powermock</groupId>
                        <artifactId>powermock-api-mockito</artifactId>
-                       <version>1.6.3</version>
                        <scope>test</scope>
                </dependency>
        </dependencies>
@@ -188,21 +158,10 @@
                <finalName>${project.artifactId}-${project.version}</finalName>
                <testOutputDirectory>target/test-classes</testOutputDirectory>
                <plugins>
-                       <!-- hivemall-core-xx.jar -->
-                       <plugin>
-                               <groupId>org.apache.maven.plugins</groupId>
-                               <artifactId>maven-jar-plugin</artifactId>
-                               <version>2.5</version>
-                               <configuration>
-                                       
<finalName>${project.artifactId}-${project.version}</finalName>
-                                       
<outputDirectory>${project.parent.build.directory}</outputDirectory>
-                               </configuration>
-                       </plugin>
                        <!-- hivemall-core-xx-with-dependencies.jar including 
minimum dependencies -->
                        <plugin>
                                <groupId>org.apache.maven.plugins</groupId>
                                <artifactId>maven-shade-plugin</artifactId>
-                               <version>3.1.0</version>
                                <executions>
                                        <execution>
                                                <id>jar-with-dependencies</id>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/mixserv/pom.xml
----------------------------------------------------------------------
diff --git a/mixserv/pom.xml b/mixserv/pom.xml
index 13d9220..09b134f 100644
--- a/mixserv/pom.xml
+++ b/mixserv/pom.xml
@@ -39,49 +39,26 @@
                <dependency>
                        <groupId>org.apache.hadoop</groupId>
                        <artifactId>hadoop-common</artifactId>
-                       <version>${hadoop.version}</version>
                        <scope>provided</scope>
                </dependency>
                <dependency>
                        <groupId>org.apache.hadoop</groupId>
                        <artifactId>hadoop-mapreduce-client-core</artifactId>
-                       <version>${hadoop.version}</version>
                        <scope>provided</scope>
                </dependency>
                <dependency>
                        <groupId>org.apache.hive</groupId>
                        <artifactId>hive-exec</artifactId>
-                       <version>${hive.version}</version>
                        <scope>provided</scope>
-                       <exclusions>
-                               <exclusion>
-                                       <artifactId>jetty</artifactId>
-                                       <groupId>org.mortbay.jetty</groupId>
-                               </exclusion>
-                               <exclusion>
-                                       <groupId>javax.jdo</groupId>
-                                       <artifactId>jdo2-api</artifactId>
-                               </exclusion>
-                               <exclusion>
-                                       <groupId>asm-parent</groupId>
-                                       <artifactId>asm-parent</artifactId>
-                               </exclusion>
-                               <exclusion>
-                                       <groupId>asm</groupId>
-                                       <artifactId>asm</artifactId>
-                               </exclusion>
-                       </exclusions>
                </dependency>
                <dependency>
                        <groupId>javax.jdo</groupId>
                        <artifactId>jdo2-api</artifactId>
-                       <version>2.3-eb</version>
                        <scope>provided</scope>
                </dependency>
                <dependency>
                        <groupId>com.google.guava</groupId>
                        <artifactId>guava</artifactId>
-                       <version>${guava.version}</version>
                        <scope>provided</scope>
                </dependency>
 
@@ -102,19 +79,16 @@
                <dependency>
                        <groupId>commons-cli</groupId>
                        <artifactId>commons-cli</artifactId>
-                       <version>1.2</version>
                        <scope>compile</scope>
                </dependency>
                <dependency>
                        <groupId>commons-logging</groupId>
                        <artifactId>commons-logging</artifactId>
-                       <version>1.0.4</version>
                        <scope>compile</scope>
                </dependency>
                <dependency>
                        <groupId>log4j</groupId>
                        <artifactId>log4j</artifactId>
-                       <version>1.2.17</version>
                        <scope>compile</scope>
                </dependency>
                <dependency>
@@ -129,13 +103,11 @@
                <dependency>
                        <groupId>junit</groupId>
                        <artifactId>junit</artifactId>
-                       <version>${junit.version}</version>
                        <scope>test</scope>
                </dependency>
                <dependency>
                        <groupId>org.mockito</groupId>
                        <artifactId>mockito-all</artifactId>
-                       <version>1.10.19</version>
                        <scope>test</scope>
                </dependency>
        </dependencies>
@@ -150,7 +122,6 @@
                        <plugin>
                                <groupId>org.apache.maven.plugins</groupId>
                                <artifactId>maven-shade-plugin</artifactId>
-                               <version>3.1.0</version>
                                <executions>
                                        <execution>
                                                <id>jar-with-dependencies</id>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/nlp/pom.xml
----------------------------------------------------------------------
diff --git a/nlp/pom.xml b/nlp/pom.xml
index c667c4e..429de86 100644
--- a/nlp/pom.xml
+++ b/nlp/pom.xml
@@ -39,67 +39,41 @@
                <dependency>
                        <groupId>org.apache.hadoop</groupId>
                        <artifactId>hadoop-common</artifactId>
-                       <version>${hadoop.version}</version>
                        <scope>provided</scope>
                </dependency>
                <dependency>
                        <groupId>org.apache.hadoop</groupId>
                        <artifactId>hadoop-mapreduce-client-core</artifactId>
-                       <version>${hadoop.version}</version>
                        <scope>provided</scope>
                </dependency>
                <dependency>
                        <groupId>org.apache.hive</groupId>
                        <artifactId>hive-exec</artifactId>
-                       <version>${hive.version}</version>
                        <scope>provided</scope>
-                       <exclusions>
-                               <exclusion>
-                                       <artifactId>jetty</artifactId>
-                                       <groupId>org.mortbay.jetty</groupId>
-                               </exclusion>
-                               <exclusion>
-                                       <groupId>javax.jdo</groupId>
-                                       <artifactId>jdo2-api</artifactId>
-                               </exclusion>
-                               <exclusion>
-                                       <groupId>asm-parent</groupId>
-                                       <artifactId>asm-parent</artifactId>
-                               </exclusion>
-                               <exclusion>
-                                       <groupId>asm</groupId>
-                                       <artifactId>asm</artifactId>
-                               </exclusion>
-                       </exclusions>
                </dependency>
                <dependency>
                        <groupId>commons-cli</groupId>
                        <artifactId>commons-cli</artifactId>
-                       <version>1.2</version>
                        <scope>provided</scope>
                </dependency>
                <dependency>
                        <groupId>commons-logging</groupId>
                        <artifactId>commons-logging</artifactId>
-                       <version>1.0.4</version>
                        <scope>provided</scope>
                </dependency>
                <dependency>
                        <groupId>log4j</groupId>
                        <artifactId>log4j</artifactId>
-                       <version>1.2.17</version>
                        <scope>provided</scope>
                </dependency>
                <dependency>
                        <groupId>javax.jdo</groupId>
                        <artifactId>jdo2-api</artifactId>
-                       <version>2.3-eb</version>
                        <scope>provided</scope>
                </dependency>
                <dependency>
                        <groupId>com.google.guava</groupId>
                        <artifactId>guava</artifactId>
-                       <version>${guava.version}</version>
                        <scope>provided</scope>
                </dependency>
                <dependency>
@@ -127,7 +101,6 @@
                <dependency>
                        <groupId>junit</groupId>
                        <artifactId>junit</artifactId>
-                       <version>${junit.version}</version>
                        <scope>test</scope>
                </dependency>
                <dependency>
@@ -145,21 +118,10 @@
                <finalName>${project.artifactId}-${project.version}</finalName>
                <testOutputDirectory>target/test-classes</testOutputDirectory>
                <plugins>
-                       <!-- hivemall-nlp-xx.jar -->
-                       <plugin>
-                               <groupId>org.apache.maven.plugins</groupId>
-                               <artifactId>maven-jar-plugin</artifactId>
-                               <version>2.5</version>
-                               <configuration>
-                                       
<finalName>${project.artifactId}-${project.version}</finalName>
-                                       
<outputDirectory>${project.parent.build.directory}</outputDirectory>
-                               </configuration>
-                       </plugin>
                        <!-- hivemall-nlp-xx-with-dependencies.jar including 
minimum dependencies -->
                        <plugin>
                                <groupId>org.apache.maven.plugins</groupId>
                                <artifactId>maven-shade-plugin</artifactId>
-                               <version>3.1.0</version>
                                <executions>
                                        <execution>
                                                <id>jar-with-dependencies</id>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 444e38a..cc3bdce 100644
--- a/pom.xml
+++ b/pom.xml
@@ -249,6 +249,7 @@
                <module>nlp</module>
                <module>xgboost</module>
                <module>mixserv</module>
+               <module>spark</module>
        </modules>
 
        <properties>
@@ -256,8 +257,6 @@
                <java.target.version>1.7</java.target.version>
                <maven.compiler.source>1.7</maven.compiler.source>
                <maven.compiler.target>1.7</maven.compiler.target>
-               <scala.version>2.11.8</scala.version>
-               <scala.binary.version>2.11</scala.binary.version>
                
<maven.build.timestamp.format>yyyy</maven.build.timestamp.format>
                <build.year>${maven.build.timestamp}</build.year>
                
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
@@ -315,50 +314,6 @@
 
        <profiles>
                <profile>
-                       <id>spark-2.2</id>
-                       <modules>
-                               <module>spark/spark-2.2</module>
-                               <module>spark/spark-common</module>
-                       </modules>
-                       <properties>
-                               <spark.version>2.2.0</spark.version>
-                               <spark.binary.version>2.2</spark.binary.version>
-                       </properties>
-                       <build>
-                               <plugins>
-                                       <!-- Spark-2.2 only supports Java 8 -->
-                                       <plugin>
-                                               
<groupId>org.apache.maven.plugins</groupId>
-                                               
<artifactId>maven-enforcer-plugin</artifactId>
-                                               
<version>${maven-enforcer-plugin.version}</version>
-                                               <executions>
-                                                       <execution>
-                                                               
<id>enforce-versions</id>
-                                                               
<phase>validate</phase>
-                                                               <goals>
-                                                                       
<goal>enforce</goal>
-                                                               </goals>
-                                                               <configuration>
-                                                                       <rules>
-                                                                               
<requireProperty>
-                                                                               
        <property>java.source.version</property>
-                                                                               
        <regex>1.8</regex>
-                                                                               
        <regexMessage>When -Pspark-2.2 set, java.source.version must be 
1.8</regexMessage>
-                                                                               
</requireProperty>
-                                                                               
<requireProperty>
-                                                                               
        <property>java.target.version</property>
-                                                                               
        <regex>1.8</regex>
-                                                                               
        <regexMessage>When -Pspark-2.2 set, java.target.version must be 
1.8</regexMessage>
-                                                                               
</requireProperty>
-                                                                       </rules>
-                                                               </configuration>
-                                                       </execution>
-                                               </executions>
-                                       </plugin>
-                               </plugins>
-                       </build>
-               </profile>
-               <profile>
                        <id>spark-2.1</id>
                        <modules>
                                <module>spark/spark-2.1</module>
@@ -409,6 +364,110 @@
                </profile>
        </profiles>
 
+       <dependencyManagement>
+               <dependencies>
+                       <!-- provided scope -->
+                       <dependency>
+                               <groupId>org.apache.hadoop</groupId>
+                               <artifactId>hadoop-common</artifactId>
+                               <version>${hadoop.version}</version>
+                               <scope>provided</scope>
+                       </dependency>
+                       <dependency>
+                               <groupId>org.apache.hadoop</groupId>
+                               
<artifactId>hadoop-mapreduce-client-core</artifactId>
+                               <version>${hadoop.version}</version>
+                               <scope>provided</scope>
+                       </dependency>
+                       <dependency>
+                               <groupId>org.apache.hive</groupId>
+                               <artifactId>hive-exec</artifactId>
+                               <version>${hive.version}</version>
+                               <scope>provided</scope>
+                               <exclusions>
+                                       <exclusion>
+                                               <artifactId>jetty</artifactId>
+                                               
<groupId>org.mortbay.jetty</groupId>
+                                       </exclusion>
+                                       <exclusion>
+                                               <groupId>javax.jdo</groupId>
+                                               
<artifactId>jdo2-api</artifactId>
+                                       </exclusion>
+                                       <exclusion>
+                                               <groupId>asm-parent</groupId>
+                                               
<artifactId>asm-parent</artifactId>
+                                       </exclusion>
+                                       <exclusion>
+                                               <groupId>asm</groupId>
+                                               <artifactId>asm</artifactId>
+                                       </exclusion>
+                               </exclusions>
+                       </dependency>
+                       <dependency>
+                               <groupId>commons-cli</groupId>
+                               <artifactId>commons-cli</artifactId>
+                               <version>1.2</version>
+                               <scope>provided</scope>
+                       </dependency>
+                       <dependency>
+                               <groupId>commons-logging</groupId>
+                               <artifactId>commons-logging</artifactId>
+                               <version>1.0.4</version>
+                               <scope>provided</scope>
+                       </dependency>
+                       <dependency>
+                               <groupId>log4j</groupId>
+                               <artifactId>log4j</artifactId>
+                               <version>1.2.17</version>
+                               <scope>provided</scope>
+                       </dependency>
+                       <dependency>
+                               <groupId>javax.jdo</groupId>
+                               <artifactId>jdo2-api</artifactId>
+                               <version>2.3-eb</version>
+                               <scope>provided</scope>
+                       </dependency>
+                       <dependency>
+                               <groupId>com.google.guava</groupId>
+                               <artifactId>guava</artifactId>
+                               <version>${guava.version}</version>
+                               <scope>provided</scope>
+                       </dependency>
+
+                       <!-- test scope -->
+                       <dependency>
+                               <groupId>junit</groupId>
+                               <artifactId>junit</artifactId>
+                               <version>${junit.version}</version>
+                               <scope>test</scope>
+                       </dependency>
+                       <dependency>
+                               <groupId>org.mockito</groupId>
+                               <artifactId>mockito-all</artifactId>
+                               <version>1.10.19</version>
+                               <scope>test</scope>
+                       </dependency>
+                       <dependency>
+                               <groupId>org.mockito</groupId>
+                               <artifactId>mockito-core</artifactId>
+                               <version>1.10.19</version>
+                               <scope>test</scope>
+                       </dependency>
+                       <dependency>
+                               <groupId>org.powermock</groupId>
+                               <artifactId>powermock-module-junit4</artifactId>
+                               <version>1.6.3</version>
+                               <scope>test</scope>
+                       </dependency>
+                       <dependency>
+                               <groupId>org.powermock</groupId>
+                               <artifactId>powermock-api-mockito</artifactId>
+                               <version>1.6.3</version>
+                               <scope>test</scope>
+                       </dependency>
+               </dependencies>
+       </dependencyManagement>
+
        <build>
                <directory>target</directory>
                <outputDirectory>target/classes</outputDirectory>
@@ -418,6 +477,19 @@
                <pluginManagement>
                        <plugins>
                                <plugin>
+                                       
<groupId>org.apache.maven.plugins</groupId>
+                                       
<artifactId>maven-jar-plugin</artifactId>
+                                       <version>3.0.2</version>
+                                       <configuration>
+                                               
<finalName>${project.artifactId}-${project.version}</finalName>
+                                       </configuration>
+                               </plugin>
+                               <plugin>
+                                       
<groupId>org.apache.maven.plugins</groupId>
+                                       
<artifactId>maven-shade-plugin</artifactId>
+                                       <version>3.1.0</version>
+                               </plugin>
+                               <plugin>
                                        <!-- mvn formatter:format -->
                                        <groupId>net.revelc.code</groupId>
                                        
<artifactId>formatter-maven-plugin</artifactId>
@@ -677,7 +749,7 @@
                                        <failOnWarning>false</failOnWarning>
                                        
<sourceDirectory>${basedir}/src/main/scala</sourceDirectory>
                                        
<testSourceDirectory>${basedir}/src/test/scala</testSourceDirectory>
-                                       
<configLocation>spark/spark-common/scalastyle-config.xml</configLocation>
+                                       
<configLocation>spark/scalastyle-config.xml</configLocation>
                                        
<outputFile>${basedir}/target/scalastyle-output.xml</outputFile>
                                        
<inputEncoding>${project.build.sourceEncoding}</inputEncoding>
                                        
<outputEncoding>${project.reporting.outputEncoding}</outputEncoding>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/common/.cache-main
----------------------------------------------------------------------
diff --git a/spark/common/.cache-main b/spark/common/.cache-main
new file mode 100644
index 0000000..a1b634c
Binary files /dev/null and b/spark/common/.cache-main differ

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/common/pom.xml
----------------------------------------------------------------------
diff --git a/spark/common/pom.xml b/spark/common/pom.xml
new file mode 100644
index 0000000..164b89e
--- /dev/null
+++ b/spark/common/pom.xml
@@ -0,0 +1,65 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+       <modelVersion>4.0.0</modelVersion>
+
+       <parent>
+               <groupId>org.apache.hivemall</groupId>
+               <artifactId>hivemall-spark</artifactId>
+               <version>0.5.1-incubating-SNAPSHOT</version>
+               <relativePath>../pom.xml</relativePath>
+       </parent>
+
+       <artifactId>hivemall-spark-common</artifactId>
+       <name>Hivemall on Spark Common</name>
+       <packaging>jar</packaging>
+
+       <properties>
+               <main.basedir>${project.parent.parent.basedir}</main.basedir>
+       </properties>
+
+       <dependencies>
+               <!-- provided scope -->
+               <dependency>
+                       <groupId>org.apache.hadoop</groupId>
+                       <artifactId>hadoop-common</artifactId>
+                       <scope>provided</scope>
+               </dependency>
+               <dependency>
+                       <groupId>org.apache.hadoop</groupId>
+                       <artifactId>hadoop-mapreduce-client-core</artifactId>
+                       <scope>provided</scope>
+               </dependency>
+               <dependency>
+                       <groupId>org.apache.hive</groupId>
+                       <artifactId>hive-exec</artifactId>
+                       <scope>provided</scope>
+               </dependency>
+
+               <!-- compile scope -->
+               <dependency>
+                       <groupId>org.apache.hivemall</groupId>
+                       <artifactId>hivemall-core</artifactId>
+                       <scope>compile</scope>
+               </dependency>
+       </dependencies>
+
+</project>
+

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
----------------------------------------------------------------------
diff --git 
a/spark/common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
 
b/spark/common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
new file mode 100644
index 0000000..cf10ed7
--- /dev/null
+++ 
b/spark/common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.dataset;
+
+import hivemall.UDTFWithOptions;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.util.Random;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Options;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.Collector;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+/**
+ * A wrapper of [[hivemall.dataset.LogisticRegressionDataGeneratorUDTF]]. This 
wrapper is needed
+ * because Spark cannot handle HadoopUtils#getTaskId() correctly.
+ */
+@Description(name = "lr_datagen",
+        value = "_FUNC_(options string) - Generates a logistic regression 
dataset")
+public final class LogisticRegressionDataGeneratorUDTFWrapper extends 
UDTFWithOptions {
+    private transient LogisticRegressionDataGeneratorUDTF udtf =
+            new LogisticRegressionDataGeneratorUDTF();
+
+    @Override
+    protected Options getOptions() {
+        Options options = null;
+        try {
+            Method m = udtf.getClass().getDeclaredMethod("getOptions");
+            m.setAccessible(true);
+            options = (Options) m.invoke(udtf);
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return options;
+    }
+
+    @SuppressWarnings("all")
+    @Override
+    protected CommandLine processOptions(ObjectInspector[] objectInspectors)
+            throws UDFArgumentException {
+        CommandLine commands = null;
+        try {
+            Method m = udtf.getClass().getDeclaredMethod("processOptions");
+            m.setAccessible(true);
+            commands = (CommandLine) m.invoke(udtf, objectInspectors);
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return commands;
+    }
+
+    @Override
+    public StructObjectInspector initialize(ObjectInspector[] argOIs) throws 
UDFArgumentException {
+        try {
+            // Extract a collector for LogisticRegressionDataGeneratorUDTF
+            Field collector = GenericUDTF.class.getDeclaredField("collector");
+            collector.setAccessible(true);
+            udtf.setCollector((Collector) collector.get(this));
+
+            // To avoid HadoopUtils#getTaskId()
+            Class<?> clazz = udtf.getClass();
+            Field rnd1 = clazz.getDeclaredField("rnd1");
+            Field rnd2 = clazz.getDeclaredField("rnd2");
+            Field r_seed = clazz.getDeclaredField("r_seed");
+            r_seed.setAccessible(true);
+            final long seed = r_seed.getLong(udtf) + (int) 
Thread.currentThread().getId();
+            rnd1.setAccessible(true);
+            rnd2.setAccessible(true);
+            rnd1.set(udtf, new Random(seed));
+            rnd2.set(udtf, new Random(seed + 1));
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return udtf.initialize(argOIs);
+    }
+
+    @Override
+    public void process(Object[] objects) throws HiveException {
+        udtf.process(objects);
+    }
+
+    @Override
+    public void close() throws HiveException {
+        udtf.close();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java
----------------------------------------------------------------------
diff --git a/spark/common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java 
b/spark/common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java
new file mode 100644
index 0000000..b454fd9
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.ftvec;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+
+/**
+ * A wrapper of [[hivemall.ftvec.AddBiasUDF]].
+ *
+ * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark 
cannot handle List<>
+ * as a return type in Hive UDF. Therefore, the type must be passed via 
ObjectInspector.
+ */
+@Description(name = "add_bias",
+        value = "_FUNC_(features in array<string>) - Returns features with a 
bias as array<string>")
+@UDFType(deterministic = true, stateful = false)
+public class AddBiasUDFWrapper extends GenericUDF {
+    private AddBiasUDF udf = new AddBiasUDF();
+    private ListObjectInspector argumentOI = null;
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
+        if (arguments.length != 1) {
+            throw new UDFArgumentLengthException(
+                "add_bias() has an single arguments: array<string> features");
+        }
+
+        switch (arguments[0].getCategory()) {
+            case LIST:
+                argumentOI = (ListObjectInspector) arguments[0];
+                ObjectInspector elmOI = 
argumentOI.getListElementObjectInspector();
+                if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
+                    if (((PrimitiveObjectInspector) 
elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) {
+                        break;
+                    }
+                }
+            default:
+                throw new UDFArgumentTypeException(0, "Type mismatch: 
features");
+        }
+
+        return 
ObjectInspectorFactory.getStandardListObjectInspector(argumentOI.getListElementObjectInspector());
+    }
+
+    @Override
+    public Object evaluate(DeferredObject[] arguments) throws HiveException {
+        assert (arguments.length == 1);
+        @SuppressWarnings("unchecked")
+        final List<String> input = (List<String>) 
argumentOI.getList(arguments[0].get());
+        return udf.evaluate(input);
+    }
+
+    @Override
+    public String getDisplayString(String[] children) {
+        return "add_bias(" + Arrays.toString(children) + ")";
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java
----------------------------------------------------------------------
diff --git 
a/spark/common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java 
b/spark/common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java
new file mode 100644
index 0000000..0b687db
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.ftvec;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+/**
+ * A wrapper of [[hivemall.ftvec.AddFeatureIndexUDF]].
+ *
+ * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark 
cannot handle List<>
+ * as a return type in Hive UDF. Therefore, the type must be passed via 
ObjectInspector.
+ */
+@Description(
+        name = "add_feature_index",
+        value = "_FUNC_(dense features in array<double>) - Returns a feature 
vector with feature indices")
+@UDFType(deterministic = true, stateful = false)
+public class AddFeatureIndexUDFWrapper extends GenericUDF {
+    private AddFeatureIndexUDF udf = new AddFeatureIndexUDF();
+    private ListObjectInspector argumentOI = null;
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
+        if (arguments.length != 1) {
+            throw new UDFArgumentLengthException(
+                "add_feature_index() has an single arguments: array<double> 
features");
+        }
+
+        switch (arguments[0].getCategory()) {
+            case LIST:
+                argumentOI = (ListObjectInspector) arguments[0];
+                ObjectInspector elmOI = 
argumentOI.getListElementObjectInspector();
+                if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
+                    if (((PrimitiveObjectInspector) 
elmOI).getPrimitiveCategory() == PrimitiveCategory.DOUBLE) {
+                        break;
+                    }
+                }
+            default:
+                throw new UDFArgumentTypeException(0, "Type mismatch: 
features");
+        }
+
+        return 
ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
+    }
+
+    @Override
+    public Object evaluate(DeferredObject[] arguments) throws HiveException {
+        assert (arguments.length == 1);
+        @SuppressWarnings("unchecked")
+        final List<Double> input = (List<Double>) 
argumentOI.getList(arguments[0].get());
+        return udf.evaluate(input);
+    }
+
+    @Override
+    public String getDisplayString(String[] children) {
+        return "add_feature_index(" + Arrays.toString(children) + ")";
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java
----------------------------------------------------------------------
diff --git 
a/spark/common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java 
b/spark/common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java
new file mode 100644
index 0000000..5924468
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.ftvec;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+/**
+ * A wrapper of [[hivemall.ftvec.ExtractFeatureUDF]].
+ *
+ * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark 
cannot handle List<>
+ * as a return type in Hive UDF. Therefore, the type must be passed via 
ObjectInspector.
+ */
+@Description(name = "extract_feature",
+        value = "_FUNC_(feature in string) - Returns a parsed feature as 
string")
+@UDFType(deterministic = true, stateful = false)
+public class ExtractFeatureUDFWrapper extends GenericUDF {
+    private ExtractFeatureUDF udf = new ExtractFeatureUDF();
+    private PrimitiveObjectInspector argumentOI = null;
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
+        if (arguments.length != 1) {
+            throw new UDFArgumentLengthException(
+                "extract_feature() has an single arguments: string feature");
+        }
+
+        argumentOI = (PrimitiveObjectInspector) arguments[0];
+        if (argumentOI.getPrimitiveCategory() != PrimitiveCategory.STRING) {
+            throw new UDFArgumentTypeException(0, "Type mismatch: feature");
+        }
+
+        return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+    }
+
+    @Override
+    public Object evaluate(DeferredObject[] arguments) throws HiveException {
+        assert (arguments.length == 1);
+        final String input = (String) 
argumentOI.getPrimitiveJavaObject(arguments[0].get());
+        return udf.evaluate(input);
+    }
+
+    @Override
+    public String getDisplayString(String[] children) {
+        return "extract_feature(" + Arrays.toString(children) + ")";
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java
----------------------------------------------------------------------
diff --git 
a/spark/common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java 
b/spark/common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java
new file mode 100644
index 0000000..8580247
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.ftvec;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+/**
+ * A wrapper of [[hivemall.ftvec.ExtractWeightUDF]].
+ *
+ * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark 
cannot handle List<>
+ * as a return type in Hive UDF. Therefore, the type must be passed via 
ObjectInspector.
+ */
+@Description(name = "extract_weight",
+        value = "_FUNC_(feature in string) - Returns the weight of a feature 
as string")
+@UDFType(deterministic = true, stateful = false)
+public class ExtractWeightUDFWrapper extends GenericUDF {
+    private ExtractWeightUDF udf = new ExtractWeightUDF();
+    private PrimitiveObjectInspector argumentOI = null;
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
+        if (arguments.length != 1) {
+            throw new UDFArgumentLengthException(
+                "extract_weight() has an single arguments: string feature");
+        }
+
+        argumentOI = (PrimitiveObjectInspector) arguments[0];
+        if (argumentOI.getPrimitiveCategory() != PrimitiveCategory.STRING) {
+            throw new UDFArgumentTypeException(0, "Type mismatch: feature");
+        }
+
+        return 
PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.DOUBLE);
+    }
+
+    @Override
+    public Object evaluate(DeferredObject[] arguments) throws HiveException {
+        assert (arguments.length == 1);
+        final String input = (String) 
argumentOI.getPrimitiveJavaObject(arguments[0].get());
+        return udf.evaluate(input);
+    }
+
+    @Override
+    public String getDisplayString(String[] children) {
+        return "extract_weight(" + Arrays.toString(children) + ")";
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java
----------------------------------------------------------------------
diff --git 
a/spark/common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java 
b/spark/common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java
new file mode 100644
index 0000000..584be6c
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.ftvec;
+
+import java.util.Arrays;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+
+/**
+ * A wrapper of [[hivemall.ftvec.SortByFeatureUDF]].
+ *
+ * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark 
cannot handle Map<>
+ * as a return type in Hive UDF. Therefore, the type must be passed via 
ObjectInspector.
+ */
+@Description(name = "sort_by_feature",
+        value = "_FUNC_(map in map<int,float>) - Returns a sorted map")
+@UDFType(deterministic = true, stateful = false)
+public class SortByFeatureUDFWrapper extends GenericUDF {
+    private SortByFeatureUDF udf = new SortByFeatureUDF();
+    private MapObjectInspector argumentOI = null;
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
+        if (arguments.length != 1) {
+            throw new UDFArgumentLengthException(
+                "sorted_by_feature() has an single arguments: map<int, float> 
map");
+        }
+
+        switch (arguments[0].getCategory()) {
+            case MAP:
+                argumentOI = (MapObjectInspector) arguments[0];
+                ObjectInspector keyOI = argumentOI.getMapKeyObjectInspector();
+                ObjectInspector valueOI = 
argumentOI.getMapValueObjectInspector();
+                if (keyOI.getCategory().equals(Category.PRIMITIVE)
+                        && valueOI.getCategory().equals(Category.PRIMITIVE)) {
+                    final PrimitiveCategory keyCategory = 
((PrimitiveObjectInspector) keyOI).getPrimitiveCategory();
+                    final PrimitiveCategory valueCategory = 
((PrimitiveObjectInspector) valueOI).getPrimitiveCategory();
+                    if (keyCategory == PrimitiveCategory.INT
+                            && valueCategory == PrimitiveCategory.FLOAT) {
+                        break;
+                    }
+                }
+            default:
+                throw new UDFArgumentTypeException(0, "Type mismatch: map");
+        }
+
+
+        return ObjectInspectorFactory.getStandardMapObjectInspector(
+            argumentOI.getMapKeyObjectInspector(), 
argumentOI.getMapValueObjectInspector());
+    }
+
+    @Override
+    public Object evaluate(DeferredObject[] arguments) throws HiveException {
+        assert (arguments.length == 1);
+        @SuppressWarnings("unchecked")
+        final Map<IntWritable, FloatWritable> input = (Map<IntWritable, 
FloatWritable>) argumentOI.getMap(arguments[0].get());
+        return udf.evaluate(input);
+    }
+
+    @Override
+    public String getDisplayString(String[] children) {
+        return "sort_by_feature(" + Arrays.toString(children) + ")";
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
----------------------------------------------------------------------
diff --git 
a/spark/common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
 
b/spark/common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
new file mode 100644
index 0000000..db533be
--- /dev/null
+++ 
b/spark/common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.ftvec.scaling;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.Text;
+
+/**
+ * A wrapper of [[hivemall.ftvec.scaling.L2NormalizationUDF]].
+ *
+ * NOTE: This is needed to avoid the issue of Spark reflection. That is, 
spark-1.3 cannot handle
+ * List<> as a return type in Hive UDF. The type must be passed via 
ObjectInspector. This issues has
+ * been reported in SPARK-6747, so a future release of Spark makes the wrapper 
obsolete.
+ */
+public class L2NormalizationUDFWrapper extends GenericUDF {
+    private L2NormalizationUDF udf = new L2NormalizationUDF();
+
+    private transient List<Text> retValue = new ArrayList<Text>();
+    private transient Converter toListText = null;
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
+        if (arguments.length != 1) {
+            throw new UDFArgumentLengthException("normalize() has an only 
single argument.");
+        }
+
+        switch (arguments[0].getCategory()) {
+            case LIST:
+                ObjectInspector elmOI = ((ListObjectInspector) 
arguments[0]).getListElementObjectInspector();
+                if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
+                    if (((PrimitiveObjectInspector) 
elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) {
+                        break;
+                    }
+                }
+            default:
+                throw new UDFArgumentTypeException(0,
+                    "normalize() must have List[String] as an argument, but "
+                            + arguments[0].getTypeName() + " was found.");
+        }
+
+        // Create a ObjectInspector converter for arguments
+        ObjectInspector outputElemOI = 
ObjectInspectorFactory.getReflectionObjectInspector(
+            Text.class, ObjectInspectorOptions.JAVA);
+        ObjectInspector outputOI = 
ObjectInspectorFactory.getStandardListObjectInspector(outputElemOI);
+        toListText = ObjectInspectorConverters.getConverter(arguments[0], 
outputOI);
+
+        ObjectInspector listElemOI = 
PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+        ObjectInspector returnElemOI = 
ObjectInspectorUtils.getStandardObjectInspector(listElemOI);
+        return 
ObjectInspectorFactory.getStandardListObjectInspector(returnElemOI);
+    }
+
+    @Override
+    public Object evaluate(DeferredObject[] arguments) throws HiveException {
+        assert (arguments.length == 1);
+        @SuppressWarnings("unchecked")
+        final List<Text> input = (List<Text>) 
toListText.convert(arguments[0].get());
+        retValue = udf.evaluate(input);
+        return retValue;
+    }
+
+    @Override
+    public String getDisplayString(String[] children) {
+        return "normalize(" + Arrays.toString(children) + ")";
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java
----------------------------------------------------------------------
diff --git 
a/spark/common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java 
b/spark/common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java
new file mode 100644
index 0000000..d3bcbe6
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.knn.lsh;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+
+/** A wrapper of [[hivemall.knn.lsh.MinHashesUDF]]. */
+@Description(
+        name = "minhashes",
+        value = "_FUNC_(features in array<string>, noWeight in boolean) - 
Returns hashed features as array<int>")
+@UDFType(deterministic = true, stateful = false)
+public class MinHashesUDFWrapper extends GenericUDF {
+    private MinHashesUDF udf = new MinHashesUDF();
+    private ListObjectInspector featuresOI = null;
+    private PrimitiveObjectInspector noWeightOI = null;
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
+        if (arguments.length != 2) {
+            throw new UDFArgumentLengthException(
+                "minhashes() has 2 arguments: array<string> features, boolean 
noWeight");
+        }
+
+        // Check argument types
+        switch (arguments[0].getCategory()) {
+            case LIST:
+                featuresOI = (ListObjectInspector) arguments[0];
+                ObjectInspector elmOI = 
featuresOI.getListElementObjectInspector();
+                if (elmOI.getCategory().equals(Category.PRIMITIVE)) {
+                    if (((PrimitiveObjectInspector) 
elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) {
+                        break;
+                    }
+                }
+            default:
+                throw new UDFArgumentTypeException(0, "Type mismatch: 
features");
+        }
+
+        noWeightOI = (PrimitiveObjectInspector) arguments[1];
+        if (noWeightOI.getPrimitiveCategory() != PrimitiveCategory.BOOLEAN) {
+            throw new UDFArgumentException("Type mismatch: noWeight");
+        }
+
+        return 
ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.INT));
+    }
+
+    @Override
+    public Object evaluate(DeferredObject[] arguments) throws HiveException {
+        assert (arguments.length == 2);
+        @SuppressWarnings("unchecked")
+        final List<String> features = (List<String>) 
featuresOI.getList(arguments[0].get());
+        final Boolean noWeight = 
PrimitiveObjectInspectorUtils.getBoolean(arguments[1].get(),
+            noWeightOI);
+        return udf.evaluate(features, noWeight);
+    }
+
+    @Override
+    public String getDisplayString(String[] children) {
+        /**
+         * TODO: Need to return hive-specific type names.
+         */
+        return "minhashes(" + Arrays.toString(children) + ")";
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java
----------------------------------------------------------------------
diff --git 
a/spark/common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java 
b/spark/common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java
new file mode 100644
index 0000000..f386223
--- /dev/null
+++ b/spark/common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.tools.mapred;
+
+import java.util.UUID;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+/** An alternative implementation of [[hivemall.tools.mapred.RowIdUDF]]. */
+@Description(
+        name = "rowid",
+        value = "_FUNC_() - Returns a generated row id of a form 
{TASK_ID}-{UUID}-{SEQUENCE_NUMBER}")
+@UDFType(deterministic = false, stateful = true)
+public class RowIdUDFWrapper extends GenericUDF {
+    // RowIdUDF is directly used because spark cannot
+    // handle HadoopUtils#getTaskId().
+
+    private long sequence;
+    private long taskId;
+
+    public RowIdUDFWrapper() {
+        this.sequence = 0L;
+        this.taskId = Thread.currentThread().getId();
+    }
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
+        if (arguments.length != 0) {
+            throw new UDFArgumentLengthException("row_number() has no 
argument.");
+        }
+
+        return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+    }
+
+    @Override
+    public Object evaluate(DeferredObject[] arguments) throws HiveException {
+        assert (arguments.length == 0);
+        sequence++;
+        /**
+         * TODO: Check if it is unique over all tasks in executors of Spark.
+         */
+        return taskId + "-" + UUID.randomUUID() + "-" + sequence;
+    }
+
+    @Override
+    public String getDisplayString(String[] children) {
+        return "row_number()";
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/common/src/main/scala/hivemall/HivemallException.scala
----------------------------------------------------------------------
diff --git a/spark/common/src/main/scala/hivemall/HivemallException.scala 
b/spark/common/src/main/scala/hivemall/HivemallException.scala
new file mode 100644
index 0000000..53f6756
--- /dev/null
+++ b/spark/common/src/main/scala/hivemall/HivemallException.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall
+
+class HivemallException(message: String, cause: Throwable)
+    extends Exception(message, cause) {
+
+  def this(message: String) = this(message, null)
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
----------------------------------------------------------------------
diff --git 
a/spark/common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
 
b/spark/common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
new file mode 100644
index 0000000..3fb2d18
--- /dev/null
+++ 
b/spark/common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.spark.ml.feature
+
+import java.util.StringTokenizer
+
+import scala.collection.mutable.ListBuffer
+
+import hivemall.HivemallException
+
+// Used for DataFrame#explode
+case class HivemallFeature(feature: String)
+
+/**
+ * Class that represents the features and labels of a data point for Hivemall.
+ *
+ * @param label Label for this data point.
+ * @param features List of features for this data point.
+ */
+case class HivemallLabeledPoint(label: Float = 0.0f, features: Seq[String]) {
+  override def toString: String = {
+    "%s,%s".format(label, features.mkString("[", ",", "]"))
+  }
+}
+
+object HivemallLabeledPoint {
+
+  // Simple parser for HivemallLabeledPoint
+  def parse(s: String): HivemallLabeledPoint = {
+    val (label, features) = s.indexOf(',') match {
+      case d if d > 0 => (s.substring(0, d), s.substring(d + 1))
+      case _ => ("0.0", "[]") // Dummy
+    }
+    HivemallLabeledPoint(label.toFloat, parseTuple(new 
StringTokenizer(features, "[],", true)))
+  }
+
+  // TODO: Support to parse rows without labels
+  private[this] def parseTuple(tokenizer: StringTokenizer): Seq[String] = {
+    val items = ListBuffer.empty[String]
+    var parsing = true
+    var allowDelim = false
+    while (parsing && tokenizer.hasMoreTokens()) {
+      val token = tokenizer.nextToken()
+      if (token == "[") {
+        items ++= parseTuple(tokenizer)
+        parsing = false
+        allowDelim = true
+      } else if (token == ",") {
+        if (allowDelim) {
+          allowDelim = false
+        } else {
+          throw new HivemallException("Found ',' at a wrong position.")
+        }
+      } else if (token == "]") {
+        parsing = false
+      } else {
+        items.append(token)
+        allowDelim = true
+      }
+    }
+    if (parsing) {
+      throw new HivemallException(s"A tuple must end with ']'.")
+    }
+    items
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/pom.xml
----------------------------------------------------------------------
diff --git a/spark/pom.xml b/spark/pom.xml
new file mode 100644
index 0000000..7db85ab
--- /dev/null
+++ b/spark/pom.xml
@@ -0,0 +1,159 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+       <modelVersion>4.0.0</modelVersion>
+
+       <parent>
+               <groupId>org.apache.hivemall</groupId>
+               <artifactId>hivemall</artifactId>
+               <version>0.5.1-incubating-SNAPSHOT</version>
+               <relativePath>../pom.xml</relativePath>
+       </parent>
+
+       <artifactId>hivemall-spark</artifactId>
+       <packaging>pom</packaging>
+       <name>Hivemall on Apache Spark</name>
+
+       <modules>
+               <module>common</module>
+               <module>spark-2.0</module>
+               <module>spark-2.1</module>
+               <module>spark-2.2</module>
+       </modules>
+
+       <properties>
+               <main.basedir>${project.parent.basedir}</main.basedir>
+               <scala.version>2.11.8</scala.version>
+               <scala.binary.version>2.11</scala.binary.version>
+       </properties>
+
+       <dependencyManagement>
+               <dependencies>
+                       <!-- compile scope -->
+                       <dependency>
+                               <groupId>org.apache.hivemall</groupId>
+                               <artifactId>hivemall-core</artifactId>
+                               <version>${project.version}</version>
+                               <scope>compile</scope>
+                       </dependency>
+                       <dependency>
+                               <groupId>org.apache.hivemall</groupId>
+                               <artifactId>hivemall-xgboost</artifactId>
+                               <version>${project.version}</version>
+                               <scope>compile</scope>
+                       </dependency>
+                       <dependency>
+                               <groupId>org.apache.commons</groupId>
+                               <artifactId>commons-compress</artifactId>
+                               <version>1.8</version>
+                               <scope>compile</scope>
+                       </dependency>
+
+                       <!-- provided scope -->
+                       <dependency>
+                               <groupId>org.scala-lang</groupId>
+                               <artifactId>scala-library</artifactId>
+                               <version>${scala.version}</version>
+                               <scope>provided</scope>
+                       </dependency>
+
+                       <!-- test dependencies -->
+                       <dependency>
+                               <groupId>org.apache.hivemall</groupId>
+                               <artifactId>hivemall-mixserv</artifactId>
+                               <version>${project.version}</version>
+                               <scope>test</scope>
+                       </dependency>
+                       <dependency>
+                               <groupId>org.xerial</groupId>
+                               <artifactId>xerial-core</artifactId>
+                               <version>3.2.3</version>
+                               <scope>test</scope>
+                       </dependency>
+                       <dependency>
+                               <groupId>org.scalatest</groupId>
+                               
<artifactId>scalatest_${scala.binary.version}</artifactId>
+                               <version>2.2.4</version>
+                               <scope>test</scope>
+                       </dependency>
+               </dependencies>
+       </dependencyManagement>
+
+       <build>
+               <directory>target</directory>
+               <outputDirectory>target/classes</outputDirectory>
+               <finalName>${project.artifactId}-${project.version}</finalName>
+               <testOutputDirectory>target/test-classes</testOutputDirectory>
+
+               <pluginManagement>
+                       <plugins>
+                               <plugin>
+                                       <groupId>net.alchim31.maven</groupId>
+                                       
<artifactId>scala-maven-plugin</artifactId>
+                                       <version>3.2.2</version>
+                               </plugin>
+                       </plugins>
+               </pluginManagement>
+
+               <plugins>
+                       <plugin>
+                               <groupId>net.alchim31.maven</groupId>
+                               <artifactId>scala-maven-plugin</artifactId>
+                               <executions>
+                                       <execution>
+                                               <id>scala-compile-first</id>
+                                               <phase>process-resources</phase>
+                                               <goals>
+                                                       <goal>add-source</goal>
+                                                       <goal>compile</goal>
+                                               </goals>
+                                       </execution>
+                                       <execution>
+                                               <id>scala-test-compile</id>
+                                               
<phase>process-test-resources</phase>
+                                               <goals>
+                                                       <goal>testCompile</goal>
+                                               </goals>
+                                       </execution>
+                               </executions>
+                               <!-- For incremental compilation -->
+                               <configuration>
+                                       
<scalaVersion>${scala.version}</scalaVersion>
+                                       
<recompileMode>incremental</recompileMode>
+                                       <useZincServer>true</useZincServer>
+                                       <args>
+                                               <arg>-unchecked</arg>
+                                               <arg>-deprecation</arg>
+                                               <!-- TODO: To enable this 
option, we need to fix many wornings -->
+                                               <!-- <arg>-feature</arg> -->
+                                       </args>
+                                       <jvmArgs>
+                                               <jvmArg>-Xms512m</jvmArg>
+                                               <jvmArg>-Xmx1024m</jvmArg>
+                                               
<jvmArg>-XX:PermSize=64m</jvmArg>
+                                               
<jvmArg>-XX:MaxPermSize=512m</jvmArg>
+                                               
<jvmArg>-XX:ReservedCodeCacheSize=512m</jvmArg>
+                                       </jvmArgs>
+                               </configuration>
+                       </plugin>
+               </plugins>
+       </build>
+
+</project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e2fb6f86/spark/scalastyle-config.xml
----------------------------------------------------------------------
diff --git a/spark/scalastyle-config.xml b/spark/scalastyle-config.xml
new file mode 100644
index 0000000..13d1c47
--- /dev/null
+++ b/spark/scalastyle-config.xml
@@ -0,0 +1,333 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<!--
+If you wish to turn off checking for a section of code, you can put a comment 
in the source
+before and after the section, with the following syntax:
+
+  // scalastyle:off
+  ...  // stuff that breaks the styles
+  // scalastyle:on
+
+You can also disable only one rule, by specifying its rule id, as specified in:
+  http://www.scalastyle.org/rules-0.7.0.html
+
+  // scalastyle:off no.finalize
+  override def finalize(): Unit = ...
+  // scalastyle:on no.finalize
+
+This file is divided into 3 sections:
+ (1) rules that we enforce.
+ (2) rules that we would like to enforce, but haven't cleaned up the codebase 
to turn on yet
+     (or we need to make the scalastyle rule more configurable).
+ (3) rules that we don't want to enforce.
+-->
+
+<scalastyle>
+  <name>Scalastyle standard configuration</name>
+
+  <!-- 
================================================================================
 -->
+  <!--                               rules we enforce                          
         -->
+  <!-- 
================================================================================
 -->
+
+  <check level="error" class="org.scalastyle.file.FileTabChecker" 
enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.HeaderMatchesChecker" 
enabled="true">
+    <parameters>
+       <parameter name="header"><![CDATA[/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */]]></parameter>
+    </parameters>
+  </check>
+
+  <check level="error" 
class="org.scalastyle.scalariform.SpacesAfterPlusChecker" 
enabled="true"></check>
+
+  <check level="error" 
class="org.scalastyle.scalariform.SpacesBeforePlusChecker" 
enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" 
enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.FileLineLengthChecker" 
enabled="true">
+    <parameters>
+      <parameter name="maxLineLength"><![CDATA[100]]></parameter>
+      <parameter name="tabSize"><![CDATA[2]]></parameter>
+      <parameter name="ignoreImports">true</parameter>
+    </parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" 
enabled="true">
+    <parameters><parameter 
name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" 
enabled="true">
+    <parameters><parameter 
name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
+  </check>
+
+  <check level="error" 
class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
+    <parameters><parameter 
name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters>
+  </check>
+
+  <check level="error" 
class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
+    <parameters><parameter 
name="maxParameters"><![CDATA[10]]></parameter></parameters>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" 
enabled="true"></check>
+
+  <check level="error" 
class="org.scalastyle.scalariform.CovariantEqualsChecker" 
enabled="true"></check>
+
+  <check level="error" 
class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" 
enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.scalariform.IfBraceChecker" 
enabled="true">
+    <parameters>
+      <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
+      <parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
+    </parameters>
+  </check>
+
+  <check level="error" 
class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" 
enabled="true"></check>
+
+  <check level="error" class="org.scalastyle.file.NewLineAtEofChecker" 
enabled="true"></check>
+
+  <check customId="nonascii" level="error" 
class="org.scalastyle.scalariform.NonASCIICharacterChecker" 
enabled="true"></check>
+
+  <check level="error" 
class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" 
enabled="true"></check>
+
+  <check level="error" 
class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" 
enabled="true">
+   <parameters>
+     <parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, 
LARROW, RARROW</parameter>
+   </parameters>
+  </check>
+
+  <check level="error" 
class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" 
enabled="true">
+    <parameters>
+     <parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, 
WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
+    </parameters>
+  </check>
+
+  <!-- ??? usually shouldn't be checked into the code base. -->
+  <check level="error" 
class="org.scalastyle.scalariform.NotImplementedErrorUsage" 
enabled="true"></check>
+
+  <!-- As of SPARK-7977 all printlns need to be wrapped in '// 
scalastyle:off/on println' -->
+  <check customId="println" level="error" 
class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter name="regex">^println$</parameter></parameters>
+    <customMessage><![CDATA[Are you sure you want to println? If yes, wrap the 
code block with
+      // scalastyle:off println
+      println(...)
+      // scalastyle:on println]]></customMessage>
+  </check>
+
+  <check customId="visiblefortesting" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter 
name="regex">@VisibleForTesting</parameter></parameters>
+    <customMessage><![CDATA[
+      @VisibleForTesting causes classpath issues. Please note this in the java 
doc instead (SPARK-11615).
+    ]]></customMessage>
+  </check>
+
+  <check customId="runtimeaddshutdownhook" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter 
name="regex">Runtime\.getRuntime\.addShutdownHook</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In 
most cases, you should use
+      ShutdownHookManager.addShutdownHook instead.
+      If you must use Runtime.getRuntime.addShutdownHook, wrap the code block 
with
+      // scalastyle:off runtimeaddshutdownhook
+      Runtime.getRuntime.addShutdownHook(...)
+      // scalastyle:on runtimeaddshutdownhook
+    ]]></customMessage>
+  </check>
+
+  <check customId="mutablesynchronizedbuffer" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter 
name="regex">mutable\.SynchronizedBuffer</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use mutable.SynchronizedBuffer? In most 
cases, you should use
+      java.util.concurrent.ConcurrentLinkedQueue instead.
+      If you must use mutable.SynchronizedBuffer, wrap the code block with
+      // scalastyle:off mutablesynchronizedbuffer
+      mutable.SynchronizedBuffer[...]
+      // scalastyle:on mutablesynchronizedbuffer
+    ]]></customMessage>
+  </check>
+
+  <check customId="classforname" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Class\.forName</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Class.forName? In most cases, you 
should use Utils.classForName instead.
+      If you must use Class.forName, wrap the code block with
+      // scalastyle:off classforname
+      Class.forName(...)
+      // scalastyle:on classforname
+    ]]></customMessage>
+  </check>
+
+  <check customId="awaitresult" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Await\.result</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Await.result? In most cases, you 
should use ThreadUtils.awaitResult instead.
+      If you must use Await.result, wrap the code block with
+      // scalastyle:off awaitresult
+      Await.result(...)
+      // scalastyle:on awaitresult
+    ]]></customMessage>
+  </check>
+
+  <!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters 
-->
+  <check customId="javaconversions" level="error" 
class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter 
name="regex">JavaConversions</parameter></parameters>
+    <customMessage>Instead of importing implicits in 
scala.collection.JavaConversions._, import
+    scala.collection.JavaConverters._ and use .asScala / .asJava 
methods</customMessage>
+  </check>
+
+  <check customId="commonslang2" level="error" 
class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter 
name="regex">org\.apache\.commons\.lang\.</parameter></parameters>
+    <customMessage>Use Commons Lang 3 classes (package 
org.apache.commons.lang3.*) instead
+    of Commons Lang 2 (package org.apache.commons.lang.*)</customMessage>
+  </check>
+
+  <check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" 
enabled="true">
+    <parameters>
+      <parameter name="groups">java,scala,3rdParty,spark</parameter>
+      <parameter name="group.java">javax?\..*</parameter>
+      <parameter name="group.scala">scala\..*</parameter>
+      <parameter name="group.3rdParty">(?!org\.apache\.spark\.).*</parameter>
+      <parameter name="group.spark">org\.apache\.spark\..*</parameter>
+    </parameters>
+  </check>
+
+  <check level="error" 
class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" 
enabled="true">
+    <parameters>
+      <parameter name="tokens">COMMA</parameter>
+    </parameters>
+  </check>
+
+  <!-- SPARK-3854: Single Space between ')' and '{' -->
+  <check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">\)\{</parameter></parameters>
+    <customMessage><![CDATA[
+      Single Space between ')' and `{`.
+    ]]></customMessage>
+  </check>
+
+  <check customId="NoScalaDoc" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1  
[*]</parameter></parameters>
+    <customMessage>Use Javadoc style indentation for multiline 
comments</customMessage>
+  </check>
+
+  <check customId="OmitBracesInCase" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter 
name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
+    <customMessage>Omit braces in case clauses.</customMessage>
+  </check>
+
+  <!-- SPARK-16877: Avoid Java annotations -->
+  <check customId="OverrideJavaCase" level="error" 
class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+    <parameters><parameter name="regex">^Override$</parameter></parameters>
+    <customMessage>override modifier should be used instead of 
@java.lang.Override.</customMessage>
+  </check>
+
+  <check level="error" 
class="org.scalastyle.scalariform.DeprecatedJavaChecker" enabled="true"></check>
+
+  <!-- 
================================================================================
 -->
+  <!--       rules we'd like to enforce, but haven't cleaned up the codebase 
yet        -->
+  <!-- 
================================================================================
 -->
+
+  <!-- We cannot turn the following two on, because it'd fail a lot of string 
interpolation use cases. -->
+  <!-- Ideally the following two rules should be configurable to rule out 
string interpolation. -->
+  <check level="error" 
class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" 
enabled="false"></check>
+  <check level="error" 
class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" 
enabled="false"></check>
+
+  <!-- This breaks symbolic method names so we don't turn it on. -->
+  <!-- Maybe we should update it to allow basic symbolic names, and then we 
are good to go. -->
+  <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" 
enabled="false">
+    <parameters>
+    <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
+    </parameters>
+  </check>
+
+  <!-- Should turn this on, but we have a few places that need to be fixed 
first -->
+  <check level="error" 
class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
+
+  <!-- 
================================================================================
 -->
+  <!--                               rules we don't want                       
         -->
+  <!-- 
================================================================================
 -->
+
+  <check level="error" 
class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false">
+    <parameters><parameter 
name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter></parameters>
+  </check>
+
+  <!-- We want the opposite of this: NewLineAtEofChecker -->
+  <check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" 
enabled="false"></check>
+
+  <!-- This one complains about all kinds of random things. Disable. -->
+  <check level="error" 
class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" 
enabled="false"></check>
+
+  <!-- We use return quite a bit for control flows and guards -->
+  <check level="error" class="org.scalastyle.scalariform.ReturnChecker" 
enabled="false"></check>
+
+  <!-- We use null a lot in low level code and to interface with 3rd party 
code -->
+  <check level="error" class="org.scalastyle.scalariform.NullChecker" 
enabled="false"></check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" 
enabled="false"></check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.file.FileLengthChecker" 
enabled="false">
+    <parameters><parameter name="maxFileLength">800></parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" 
enabled="false">
+    <parameters><parameter name="maxTypes">30</parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" 
class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false">
+    <parameters><parameter name="maximum">10</parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here ... -->
+  <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" 
enabled="false">
+    <parameters><parameter name="maxLength">50</parameter></parameters>
+  </check>
+
+  <!-- Not exactly feasible to enforce this right now. -->
+  <!-- It is also infrequent that somebody introduces a new class with a lot 
of methods. -->
+  <check level="error" 
class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false">
+    <parameters><parameter 
name="maxMethods"><![CDATA[30]]></parameter></parameters>
+  </check>
+
+  <!-- Doesn't seem super big deal here, and we have a lot of magic numbers 
... -->
+  <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" 
enabled="false">
+    <parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters>
+  </check>
+
+</scalastyle>

[3/3] incubator-hivemall git commit: [SPARK] Avoid using -profile for Spark module and Make spark-2.0/2.1/2.2 and spark-common as the submodule of hivemall-spark

Reply via email to