[incubator-hivemall] Git Push Summary

2016-12-01 Thread myui
Repository: incubator-hivemall
Updated Branches:
  refs/heads/JIRA-22/pr-304 [deleted] 775ae4f79


[incubator-hivemall] Git Push Summary

2016-12-01 Thread myui
Repository: incubator-hivemall
Updated Branches:
  refs/heads/JIRA-22/pr-356 [deleted] bb3250448


[incubator-hivemall] Git Push Summary

2016-12-01 Thread myui
Repository: incubator-hivemall
Updated Branches:
  refs/heads/JIRA-22/pr-336 [deleted] f8d152cba


[incubator-hivemall] Git Push Summary

2016-12-01 Thread myui
Repository: incubator-hivemall
Updated Branches:
  refs/heads/JIRA-22/pr-385 [deleted] 4c8dcbfcd


[incubator-hivemall] Git Push Summary

2016-12-01 Thread myui
Repository: incubator-hivemall
Updated Tags:  refs/tags/v0.5-alpha.1 [deleted] 2a66cf620


[incubator-hivemall] Git Push Summary

2016-12-01 Thread myui
Repository: incubator-hivemall
Updated Tags:  refs/tags/v0.4.2-rc.2 [deleted] e1df0504d


[incubator-hivemall] Git Push Summary

2016-12-01 Thread myui
Repository: incubator-hivemall
Updated Branches:
  refs/heads/JIRA-22/pr-285 [deleted] 05766432c


[25/50] [abbrv] incubator-hivemall git commit: integrate chi2 and SNR into hivemall.spark

2016-12-01 Thread myui
integrate chi2 and SNR into hivemall.spark



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/a1f8f958
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/a1f8f958
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/a1f8f958

Branch: refs/heads/JIRA-22/pr-385
Commit: a1f8f958c99f3cde9e48b6d80d364004f6d98cc2
Parents: 22a608e
Author: amaya 
Authored: Tue Sep 27 15:58:33 2016 +0900
Committer: amaya 
Committed: Tue Sep 27 15:58:33 2016 +0900

--
 .../apache/spark/sql/hive/GroupedDataEx.scala   | 24 
 .../org/apache/spark/sql/hive/HivemallOps.scala | 19 ++
 .../spark/sql/hive/HivemallOpsSuite.scala   | 63 ++-
 .../org/apache/spark/sql/hive/HivemallOps.scala | 20 ++
 .../sql/hive/RelationalGroupedDatasetEx.scala   | 26 
 .../spark/sql/hive/HivemallOpsSuite.scala   | 65 +++-
 6 files changed, 212 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/a1f8f958/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala
--
diff --git 
a/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala 
b/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala
index 37d5423..2482c62 100644
--- 
a/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala
+++ 
b/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala
@@ -264,4 +264,28 @@ final class GroupedDataEx protected[sql](
   .toAggregateExpression()
 toDF((Alias(udaf, udaf.prettyString)() :: Nil).toSeq)
   }
+
+  /**
+   * @see hivemall.ftvec.selection.SignalNoiseRatioUDAF
+   */
+  def snr(X: String, Y: String): DataFrame = {
+val udaf = HiveUDAFFunction(
+  new HiveFunctionWrapper("hivemall.ftvec.selection.SignalNoiseRatioUDAF"),
+  Seq(X, Y).map(df.col(_).expr),
+  isUDAFBridgeRequired = false)
+  .toAggregateExpression()
+toDF(Seq(Alias(udaf, udaf.prettyString)()))
+  }
+
+  /**
+   * @see hivemall.tools.matrix.TransposeAndDotUDAF
+   */
+  def transpose_and_dot(X: String, Y: String): DataFrame = {
+val udaf = HiveUDAFFunction(
+  new HiveFunctionWrapper("hivemall.tools.matrix.TransposeAndDotUDAF"),
+  Seq(X, Y).map(df.col(_).expr),
+  isUDAFBridgeRequired = false)
+  .toAggregateExpression()
+toDF(Seq(Alias(udaf, udaf.prettyString)()))
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/a1f8f958/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
--
diff --git 
a/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala 
b/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
index 133f1d5..5970b83 100644
--- a/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
+++ b/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
@@ -1006,6 +1006,15 @@ object HivemallOps {
   }
 
   /**
+* @see hivemall.ftvec.selection.ChiSquareUDF
+* @group ftvec.selection
+*/
+  def chi2(exprs: Column*): Column = {
+HiveGenericUDF(new HiveFunctionWrapper(
+  "hivemall.ftvec.selection.ChiSquareUDF"), exprs.map(_.expr))
+  }
+
+  /**
* @see hivemall.ftvec.conv.ToDenseFeaturesUDF
* @group ftvec.conv
*/
@@ -1078,6 +1087,16 @@ object HivemallOps {
   }
 
   /**
+   * @see hivemall.tools.array.SelectKBestUDF
+   * @group tools.array
+   */
+  @scala.annotation.varargs
+  def select_k_best(exprs: Column*): Column = {
+HiveGenericUDF(new HiveFunctionWrapper(
+  "hivemall.tools.array.SelectKBestUDF"), exprs.map(_.expr))
+  }
+
+  /**
* @see hivemall.tools.math.SigmoidUDF
* @group misc
*/

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/a1f8f958/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
--
diff --git 
a/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
 
b/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
index 4be1e5e..148e5a2 100644
--- 
a/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
+++ 
b/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.hive
 
-import scala.collection.mutable.Seq
-
 import org.apache.spark.sql.{Column, Row}
 import org.apache.spark.sql.hive.HivemallOps._
 import org.apache.spark.sql.hive.HivemallUtils._
@@ -188,6 +186,22 @@ final class HivemallOpsSuite extends 

[43/50] [abbrv] incubator-hivemall git commit: Update license header

2016-12-01 Thread myui
Update license header



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/798ec6a7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/798ec6a7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/798ec6a7

Branch: refs/heads/JIRA-22/pr-336
Commit: 798ec6a73ca37d474137fc82db1c22a92521307d
Parents: ddd8dc2
Author: amaya 
Authored: Fri Nov 18 04:27:59 2016 +0900
Committer: amaya 
Committed: Fri Nov 18 04:27:59 2016 +0900

--
 systemtest/pom.xml | 2 ++
 1 file changed, 2 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/798ec6a7/systemtest/pom.xml
--
diff --git a/systemtest/pom.xml b/systemtest/pom.xml
index e7345af..0debee0 100644
--- a/systemtest/pom.xml
+++ b/systemtest/pom.xml
@@ -6,7 +6,9 @@
   to you under the Apache License, Version 2.0 (the
   "License"); you may not use this file except in compliance
   with the License.  You may obtain a copy of the License at
+
 http://www.apache.org/licenses/LICENSE-2.0
+
   Unless required by applicable law or agreed to in writing,
   software distributed under the License is distributed on an
   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY



[28/50] [abbrv] incubator-hivemall git commit: refine feature selection in spark integration

2016-12-01 Thread myui
refine feature selection in spark integration



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/1347de98
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/1347de98
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/1347de98

Branch: refs/heads/JIRA-22/pr-385
Commit: 1347de985ea6f8028c9d381f8827882ad39ad3a7
Parents: aa7d529
Author: amaya 
Authored: Wed Sep 28 14:22:05 2016 +0900
Committer: amaya 
Committed: Wed Sep 28 14:22:05 2016 +0900

--
 .../org/apache/spark/sql/hive/HivemallOps.scala |  9 +-
 .../spark/sql/hive/HivemallOpsSuite.scala   | 94 ++--
 .../org/apache/spark/sql/hive/HivemallOps.scala |  8 +-
 .../spark/sql/hive/HivemallOpsSuite.scala   | 89 --
 4 files changed, 138 insertions(+), 62 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1347de98/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
--
diff --git 
a/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala 
b/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
index 41a4065..255f697 100644
--- a/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
+++ b/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
@@ -1006,9 +1006,9 @@ object HivemallOps {
 * @see hivemall.ftvec.selection.ChiSquareUDF
 * @group ftvec.selection
 */
-  def chi2(exprs: Column*): Column = {
+  def chi2(observed: Column, expected: Column): Column = {
 HiveGenericUDF(new HiveFunctionWrapper(
-  "hivemall.ftvec.selection.ChiSquareUDF"), exprs.map(_.expr))
+  "hivemall.ftvec.selection.ChiSquareUDF"), Seq(observed.expr, 
expected.expr))
   }
 
   /**
@@ -1087,10 +1087,9 @@ object HivemallOps {
* @see hivemall.tools.array.SelectKBestUDF
* @group tools.array
*/
-  @scala.annotation.varargs
-  def select_k_best(exprs: Column*): Column = {
+  def select_k_best(X: Column, importanceList: Column, k: Column): Column = {
 HiveGenericUDF(new HiveFunctionWrapper(
-  "hivemall.tools.array.SelectKBestUDF"), exprs.map(_.expr))
+  "hivemall.tools.array.SelectKBestUDF"), Seq(X.expr, importanceList.expr, 
k.expr))
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1347de98/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
--
diff --git 
a/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
 
b/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
index e118257..cce22ce 100644
--- 
a/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
+++ 
b/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
@@ -17,13 +17,14 @@
 
 package org.apache.spark.sql.hive
 
-import org.apache.spark.sql.{Column, Row}
 import org.apache.spark.sql.hive.HivemallOps._
 import org.apache.spark.sql.hive.HivemallUtils._
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.{Column, Row}
 import org.apache.spark.test.HivemallQueryTest
 import org.apache.spark.test.TestDoubleWrapper._
 import org.apache.spark.test.TestUtils._
+import org.scalatest.Matchers._
 
 final class HivemallOpsSuite extends HivemallQueryTest {
 
@@ -188,18 +189,32 @@ final class HivemallOpsSuite extends HivemallQueryTest {
 
   test("ftvec.selection - chi2") {
 import hiveContext.implicits._
-
-val df = Seq(Seq(
-  Seq(250.28, 170.93, 73.2, 12.196),
-  Seq(296.8, 138.53, 212.97, 66.3),
-  Seq(329.3, 148.7, 277.57, 101.28)) 
-> Seq(
-  Seq(292.1666753739119, 152.7455081467, 187.9893418327, 
59.9511948589),
-  Seq(292.1666753739119, 152.7455081467, 187.9893418327, 
59.9511948589),
-  Seq(292.1666753739119, 152.7455081467, 187.9893418327, 
59.9511948589))).toDF("arg0", "arg1")
-
-assert(df.select(chi2(df("arg0"), df("arg1"))).collect.toSet ===
-  Set(Row(Row(Seq(10.817820878493995, 3.5944990176817315, 
116.16984746363957, 67.24482558215503),
-Seq(0.004476514990225833, 0.16575416718561453, 0d, 
2.55351295663786e-15)
+implicit val doubleEquality = 
org.scalactic.TolerantNumerics.tolerantDoubleEquality(1e-5)
+
+// see also hivemall.ftvec.selection.ChiSquareUDFTest
+val df = Seq(
+  Seq(
+Seq(250.28, 170.93, 73.2, 12.196),
+Seq(296.8, 138.53, 212.97, 66.3),
+Seq(329.3999

[36/50] [abbrv] incubator-hivemall git commit: Mod README

2016-12-01 Thread myui
Mod README



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/ba912677
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/ba912677
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/ba912677

Branch: refs/heads/JIRA-22/pr-336
Commit: ba91267796cbfdee53aaef02af882aff591fb8f7
Parents: 43ca0c8
Author: amaya 
Authored: Thu Nov 17 14:15:03 2016 +0900
Committer: amaya 
Committed: Thu Nov 17 14:15:03 2016 +0900

--
 systemtest/README.md | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ba912677/systemtest/README.md
--
diff --git a/systemtest/README.md b/systemtest/README.md
index 4fca0c3..2805165 100644
--- a/systemtest/README.md
+++ b/systemtest/README.md
@@ -157,7 +157,7 @@ public class QuickExample {
 public void test3() throws Exception {
 // test on HiveRunner once only
 // auto matching by files which name is `test3` in `case/` and 
`answer/`
-team.set(HQ.autoMatchingByFileName("test3", ci)); // unordered test
+team.set(HQ.autoMatchingByFileName("test3"), ci); // unordered test
 team.run(); // this call is required
 }
 
@@ -165,7 +165,7 @@ public class QuickExample {
 public void test4() throws Exception {
 // test on HiveRunner once only
 predictor.expect(Throwable.class); // you can use systemtest w/ other 
rules 
-team.set(HQ.fromStatement("invalid queryyy")); // this query throws an 
exception
+team.set(HQ.fromStatement("invalid queryyy"), "never used"); // this 
query throws an exception
 team.run(); // this call is required
 // thrown exception will be caught by `ExpectedException` rule
 }
@@ -174,7 +174,7 @@ public class QuickExample {
 
 The above requires following files
 
-* `systemtest/src/test/resources/hivemall/HogeTest/init/color.tsv` 
(`systemtest/src/test/resources/${path/to/package}/${className}/init/${fileName}`)
+* `systemtest/src/test/resources/hivemall/QuickExample/init/color.tsv` 
(`systemtest/src/test/resources/${path/to/package}/${className}/init/${fileName}`)
 
 ```tsv
 blue   0   0   255
@@ -190,7 +190,7 @@ red 255 0   0
 pink   255 192 203
 ```
 
-* `systemtest/src/test/resources/hivemall/HogeTest/case/test3` 
(`systemtest/src/test/resources/${path/to/package}/${className}/case/${fileName}`)
+* `systemtest/src/test/resources/hivemall/QuickExample/case/test3` 
(`systemtest/src/test/resources/${path/to/package}/${className}/case/${fileName}`)
 
 ```sql
 -- write your hive queries
@@ -199,12 +199,12 @@ SELECT blue FROM color WHERE name = 'lavender';SELECT 
green FROM color WHERE nam
 SELECT name FROM color WHERE blue = 255
 ```
 
-* `systemtest/src/test/resources/hivemall/HogeTest/answer/test3` 
(`systemtest/src/test/resources/${path/to/package}/${className}/answer/${fileName}`)
+* `systemtest/src/test/resources/hivemall/QuickExample/answer/test3` 
(`systemtest/src/test/resources/${path/to/package}/${className}/answer/${fileName}`)
 
 tsv format is required
 
 ```tsv
-230
+250
 16569
 azurebluemagenta
 ```



[40/50] [abbrv] incubator-hivemall git commit: Fix process of tdprop

2016-12-01 Thread myui
Fix process of tdprop



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/144cb504
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/144cb504
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/144cb504

Branch: refs/heads/JIRA-22/pr-336
Commit: 144cb504d674d2509620ce0d315694be0f664f42
Parents: 3550fd3
Author: amaya 
Authored: Fri Nov 18 01:58:31 2016 +0900
Committer: amaya 
Committed: Fri Nov 18 01:58:31 2016 +0900

--
 .../systemtest/runner/TDSystemTestRunner.java   | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/144cb504/systemtest/src/main/java/hivemall/systemtest/runner/TDSystemTestRunner.java
--
diff --git 
a/systemtest/src/main/java/hivemall/systemtest/runner/TDSystemTestRunner.java 
b/systemtest/src/main/java/hivemall/systemtest/runner/TDSystemTestRunner.java
index 6d6c85b..87dd835 100644
--- 
a/systemtest/src/main/java/hivemall/systemtest/runner/TDSystemTestRunner.java
+++ 
b/systemtest/src/main/java/hivemall/systemtest/runner/TDSystemTestRunner.java
@@ -85,16 +85,20 @@ public class TDSystemTestRunner extends SystemTestRunner {
 fileUploadCommitRetryLimit = 
Integer.valueOf(props.getProperty("fileUploadCommitRetryLimit"));
 }
 
-final Properties TDPorps = System.getProperties();
+boolean fromPropertiesFile = false;
 for (Map.Entry e : props.entrySet()) {
-if (e.getKey().toString().startsWith("td.client.")) {
-TDPorps.setProperty(e.getKey().toString(), 
e.getValue().toString());
+final String key = e.getKey().toString();
+if (key.startsWith("td.client.")) {
+fromPropertiesFile = true;
+System.setProperty(key, e.getValue().toString());
 }
 }
-System.setProperties(TDPorps);
 
-client = System.getProperties().size() == TDPorps.size() ? 
TDClient.newClient() // use $HOME/.td/td.conf
-: TDClient.newBuilder(false).build(); // use *.properties
+if (fromPropertiesFile) {
+client = TDClient.newBuilder(false).build(); // use *.properties
+} else {
+client = TDClient.newClient(); // use $HOME/.td/td.conf
+}
 }
 
 @Override



[26/50] [abbrv] incubator-hivemall git commit: Merge 'master' into 'feature/feature_selection'

2016-12-01 Thread myui
Merge 'master' into 'feature/feature_selection'



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/aa7d5299
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/aa7d5299
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/aa7d5299

Branch: refs/heads/JIRA-22/pr-385
Commit: aa7d5299739349b49ef4f50cc2c1969f5cb8a78f
Parents: a1f8f95 bc8b015
Author: amaya 
Authored: Tue Sep 27 16:02:02 2016 +0900
Committer: amaya 
Committed: Tue Sep 27 16:02:02 2016 +0900

--
 README.md   |   7 +-
 core/pom.xml|   2 +-
 .../hivemall/ensemble/ArgminKLDistanceUDAF.java |   1 +
 .../main/java/hivemall/ensemble/MaxRowUDAF.java |  21 +-
 .../hivemall/ensemble/MaxValueLabelUDAF.java|   1 +
 .../hivemall/ensemble/bagging/VotedAvgUDAF.java |   1 +
 .../ensemble/bagging/WeightVotedAvgUDAF.java|   1 +
 .../main/java/hivemall/evaluation/AUCUDAF.java  |  37 +-
 .../evaluation/BinaryResponsesMeasures.java |  31 +-
 .../java/hivemall/evaluation/FMeasureUDAF.java  |   1 +
 .../evaluation/GradedResponsesMeasures.java |   7 +-
 .../evaluation/LogarithmicLossUDAF.java |   1 +
 .../main/java/hivemall/evaluation/MAPUDAF.java  |  55 +--
 .../main/java/hivemall/evaluation/MRRUDAF.java  |  55 +--
 .../evaluation/MeanAbsoluteErrorUDAF.java   |   1 +
 .../evaluation/MeanSquaredErrorUDAF.java|   1 +
 .../main/java/hivemall/evaluation/NDCGUDAF.java |  45 +--
 .../java/hivemall/evaluation/PrecisionUDAF.java |  55 +--
 .../main/java/hivemall/evaluation/R2UDAF.java   |   1 +
 .../java/hivemall/evaluation/RecallUDAF.java|  55 +--
 .../evaluation/RootMeanSquaredErrorUDAF.java|   1 +
 .../java/hivemall/fm/FMPredictGenericUDAF.java  |  23 +-
 .../hivemall/ftvec/binning/BuildBinsUDAF.java   |  45 ++-
 .../ftvec/binning/FeatureBinningUDF.java|  26 +-
 .../ftvec/binning/NumericHistogram.java |  28 +-
 .../ftvec/conv/ConvertToDenseModelUDAF.java |   1 +
 .../hivemall/ftvec/text/TermFrequencyUDAF.java  |   1 +
 .../ftvec/trans/OnehotEncodingUDAF.java | 335 +++
 .../smile/tools/RandomForestEnsembleUDAF.java   |   1 +
 .../tools/array/ArrayAvgGenericUDAF.java|  27 +-
 .../java/hivemall/tools/array/ArraySumUDAF.java |   1 +
 .../hivemall/tools/bits/BitsCollectUDAF.java|  23 +-
 .../main/java/hivemall/tools/map/UDAFToMap.java |  23 +-
 .../hivemall/tools/map/UDAFToOrderedMap.java|   6 +-
 .../java/hivemall/utils/hadoop/HiveUtils.java   |   9 +
 .../hivemall/utils/hadoop/WritableUtils.java|  15 +
 .../java/hivemall/utils/lang/Identifier.java|  38 ++-
 .../hive/ql/exec/MapredContextAccessor.java |   3 +
 .../ftvec/trans/TestBinarizeLabelUDTF.java  |   7 +-
 mixserv/pom.xml |   2 +-
 nlp/pom.xml |   2 +-
 .../hivemall/nlp/tokenizer/KuromojiUDFTest.java |  31 +-
 pom.xml |   1 +
 resources/ddl/define-all-as-permanent.hive  |   3 +
 resources/ddl/define-all.hive   |   3 +
 resources/ddl/define-udfs.td.hql|   1 +
 .../org/apache/spark/sql/hive/HivemallOps.scala |   5 +-
 .../apache/spark/sql/hive/HiveUdfSuite.scala|  36 ++
 .../spark/sql/hive/HivemallOpsSuite.scala   |  47 ++-
 .../sql/catalyst/expressions/EachTopK.scala | 108 ++
 .../org/apache/spark/sql/hive/HivemallOps.scala |  43 ++-
 .../apache/spark/sql/hive/HiveUdfSuite.scala|  43 ++-
 .../spark/sql/hive/HivemallOpsSuite.scala   |  70 ++--
 .../sql/hive/benchmark/MiscBenchmark.scala  |  72 ++--
 spark/spark-common/pom.xml  |   2 +-
 xgboost/pom.xml |   2 +-
 56 files changed, 1125 insertions(+), 338 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/aa7d5299/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
--
diff --cc core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
index 9272e60,91f1dfa..c752188
--- a/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
+++ b/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
@@@ -55,9 -55,9 +55,10 @@@ import org.apache.hadoop.hive.serde2.ob
  import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
  import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
  import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
  import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
  import 
org.apache.hadoop.hive.serde2.objectinspector.

[30/50] [abbrv] incubator-hivemall git commit: mod SNR for corner cases

2016-12-01 Thread myui
mod SNR for corner cases



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/4cfa4e5a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/4cfa4e5a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/4cfa4e5a

Branch: refs/heads/JIRA-22/pr-385
Commit: 4cfa4e5ac15a6535b187c23616c205696a1cd13b
Parents: 8e2842c
Author: amaya 
Authored: Wed Sep 28 18:26:01 2016 +0900
Committer: amaya 
Committed: Wed Sep 28 18:29:28 2016 +0900

--
 .../ftvec/selection/SignalNoiseRatioUDAF.java   |  48 +--
 .../selection/SignalNoiseRatioUDAFTest.java | 135 ++-
 2 files changed, 167 insertions(+), 16 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/4cfa4e5a/core/src/main/java/hivemall/ftvec/selection/SignalNoiseRatioUDAF.java
--
diff --git 
a/core/src/main/java/hivemall/ftvec/selection/SignalNoiseRatioUDAF.java 
b/core/src/main/java/hivemall/ftvec/selection/SignalNoiseRatioUDAF.java
index b7b9126..507aefa 100644
--- a/core/src/main/java/hivemall/ftvec/selection/SignalNoiseRatioUDAF.java
+++ b/core/src/main/java/hivemall/ftvec/selection/SignalNoiseRatioUDAF.java
@@ -21,7 +21,6 @@ package hivemall.ftvec.selection;
 import hivemall.utils.hadoop.HiveUtils;
 import hivemall.utils.hadoop.WritableUtils;
 import hivemall.utils.lang.Preconditions;
-import org.apache.commons.math3.util.FastMath;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
@@ -193,7 +192,7 @@ public class SignalNoiseRatioUDAF extends 
AbstractGenericUDAFResolver {
 
 int clazz = -1;
 for (int i = 0; i < nClasses; i++) {
-int label = 
PrimitiveObjectInspectorUtils.getInt(labels.get(i), labelOI);
+final int label = 
PrimitiveObjectInspectorUtils.getInt(labels.get(i), labelOI);
 if (label == 1 && clazz == -1) {
 clazz = i;
 } else if (label == 1) {
@@ -255,6 +254,12 @@ public class SignalNoiseRatioUDAF extends 
AbstractGenericUDAFResolver {
 for (int i = 0; i < nClasses; i++) {
 final long n = myAgg.ns[i];
 final long m = 
PrimitiveObjectInspectorUtils.getLong(ns.get(i), nOI);
+
+// no need to merge class `i`
+if (m == 0) {
+continue;
+}
+
 final List means = meansOI.getList(meanss.get(i));
 final List variances = variancesOI.getList(variancess.get(i));
 
@@ -266,10 +271,19 @@ public class SignalNoiseRatioUDAF extends 
AbstractGenericUDAFResolver {
 final double varianceN = myAgg.variancess[i][j];
 final double varianceM = 
PrimitiveObjectInspectorUtils.getDouble(
 variances.get(j), varianceOI);
-myAgg.meanss[i][j] = (n * meanN + m * meanM) / (double) (n 
+ m);
-myAgg.variancess[i][j] = (varianceN * (n - 1) + varianceM 
* (m - 1) + FastMath.pow(
-meanN - meanM, 2) * n * m / (n + m))
-/ (n + m - 1);
+
+if (n == 0) {
+// only assign `other` into `myAgg`
+myAgg.meanss[i][j] = meanM;
+myAgg.variancess[i][j] = varianceM;
+} else {
+// merge by Chan's method
+// 
http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf
+myAgg.meanss[i][j] = (n * meanN + m * meanM) / 
(double) (n + m);
+myAgg.variancess[i][j] = (varianceN * (n - 1) + 
varianceM * (m - 1) + Math.pow(
+meanN - meanM, 2) * n * m / (n + m))
+/ (n + m - 1);
+}
 }
 }
 }
@@ -302,25 +316,33 @@ public class SignalNoiseRatioUDAF extends 
AbstractGenericUDAFResolver {
 
 // calc SNR between classes each feature
 final double[] result = new double[nFeatures];
-final double[] sds = new double[nClasses]; // memo
+final double[] sds = new double[nClasses]; // for memorization
 for (int i = 0; i < nFeatures; i++) {
-sds[0] = FastMath.sqrt(myAgg.variancess[0][i]);
+sds[0] = Math.sqrt(myAgg.variancess[0][i]);
 for (int j = 1; j < nClasses; j++) {
-sds[j] = FastMath.sqrt(myAgg.variancess[j][i]);
-if (Double.isNaN(sds[j])) {
+

[14/50] [abbrv] incubator-hivemall git commit: Revert some modifications

2016-12-01 Thread myui
Revert some modifications


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/3620eb89
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/3620eb89
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/3620eb89

Branch: refs/heads/JIRA-22/pr-285
Commit: 3620eb89993db22ce8aee924d3cc0df33a5f9618
Parents: f81948c
Author: Takeshi YAMAMURO 
Authored: Wed Sep 21 01:52:22 2016 +0900
Committer: Takeshi YAMAMURO 
Committed: Wed Sep 21 01:55:59 2016 +0900

--
 .../src/main/java/hivemall/LearnerBaseUDTF.java |  33 ++
 .../hivemall/classifier/AROWClassifierUDTF.java |   2 +-
 .../hivemall/classifier/AdaGradRDAUDTF.java | 125 +++-
 .../classifier/BinaryOnlineClassifierUDTF.java  |  10 +
 .../classifier/GeneralClassifierUDTF.java   |   1 +
 .../classifier/PassiveAggressiveUDTF.java   |   2 +-
 .../main/java/hivemall/model/DenseModel.java|  86 -
 .../main/java/hivemall/model/NewDenseModel.java | 293 +
 .../model/NewSpaceEfficientDenseModel.java  | 317 +++
 .../java/hivemall/model/NewSparseModel.java | 197 
 .../java/hivemall/model/PredictionModel.java|   3 +
 .../model/SpaceEfficientDenseModel.java |  92 +-
 .../main/java/hivemall/model/SparseModel.java   |  19 +-
 .../model/SynchronizedModelWrapper.java |   6 +
 .../hivemall/regression/AROWRegressionUDTF.java |   2 +-
 .../java/hivemall/regression/AdaDeltaUDTF.java  | 118 ++-
 .../java/hivemall/regression/AdaGradUDTF.java   | 119 ++-
 .../regression/GeneralRegressionUDTF.java   |   1 +
 .../java/hivemall/regression/LogressUDTF.java   |  65 +++-
 .../PassiveAggressiveRegressionUDTF.java|   2 +-
 .../hivemall/regression/RegressionBaseUDTF.java |  12 +-
 .../NewSpaceEfficientNewDenseModelTest.java |  60 
 .../model/SpaceEfficientDenseModelTest.java |  60 
 .../java/hivemall/mix/server/MixServerTest.java |  14 +-
 .../hivemall/mix/server/MixServerSuite.scala|   4 +-
 25 files changed, 1512 insertions(+), 131 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3620eb89/core/src/main/java/hivemall/LearnerBaseUDTF.java
--
diff --git a/core/src/main/java/hivemall/LearnerBaseUDTF.java 
b/core/src/main/java/hivemall/LearnerBaseUDTF.java
index 7fd5190..4cf3c7f 100644
--- a/core/src/main/java/hivemall/LearnerBaseUDTF.java
+++ b/core/src/main/java/hivemall/LearnerBaseUDTF.java
@@ -25,6 +25,9 @@ import hivemall.model.DenseModel;
 import hivemall.model.PredictionModel;
 import hivemall.model.SpaceEfficientDenseModel;
 import hivemall.model.SparseModel;
+import hivemall.model.NewDenseModel;
+import hivemall.model.NewSparseModel;
+import hivemall.model.NewSpaceEfficientDenseModel;
 import hivemall.model.SynchronizedModelWrapper;
 import hivemall.model.WeightValue;
 import hivemall.model.WeightValue.WeightValueWithCovar;
@@ -199,6 +202,36 @@ public abstract class LearnerBaseUDTF extends 
UDTFWithOptions {
 return model;
 }
 
+protected PredictionModel createNewModel(String label) {
+PredictionModel model;
+final boolean useCovar = useCovariance();
+if (dense_model) {
+if (disable_halffloat == false && model_dims > 16777216) {
+logger.info("Build a space efficient dense model with " + 
model_dims
++ " initial dimensions" + (useCovar ? " w/ 
covariances" : ""));
+model = new NewSpaceEfficientDenseModel(model_dims, useCovar);
+} else {
+logger.info("Build a dense model with initial with " + 
model_dims
++ " initial dimensions" + (useCovar ? " w/ 
covariances" : ""));
+model = new NewDenseModel(model_dims, useCovar);
+}
+} else {
+int initModelSize = getInitialModelSize();
+logger.info("Build a sparse model with initial with " + 
initModelSize
++ " initial dimensions");
+model = new NewSparseModel(initModelSize, useCovar);
+}
+if (mixConnectInfo != null) {
+model.configureClock();
+model = new SynchronizedModelWrapper(model);
+MixClient client = configureMixClient(mixConnectInfo, label, 
model);
+model.configureMix(client, mixCancel);
+this.mixClient = client;
+}
+assert (model != null);
+return model;
+}
+
 // If a model implements a optimizer, it must override this
 protected Map getOptimzierOptions() {
 return null;

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3620eb89/core/src/main/java/hivemall/classifier/AROWCl

[39/50] [abbrv] incubator-hivemall git commit: Mod assert methods

2016-12-01 Thread myui
Mod assert methods



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/3550fd30
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/3550fd30
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/3550fd30

Branch: refs/heads/JIRA-22/pr-336
Commit: 3550fd30af3a01f4217c075a3b814952b406aebe
Parents: 1f3df54
Author: amaya 
Authored: Fri Nov 18 01:57:47 2016 +0900
Committer: amaya 
Committed: Fri Nov 18 01:57:47 2016 +0900

--
 .../main/java/hivemall/systemtest/runner/SystemTestRunner.java | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3550fd30/systemtest/src/main/java/hivemall/systemtest/runner/SystemTestRunner.java
--
diff --git 
a/systemtest/src/main/java/hivemall/systemtest/runner/SystemTestRunner.java 
b/systemtest/src/main/java/hivemall/systemtest/runner/SystemTestRunner.java
index 77091f2..f16da90 100644
--- a/systemtest/src/main/java/hivemall/systemtest/runner/SystemTestRunner.java
+++ b/systemtest/src/main/java/hivemall/systemtest/runner/SystemTestRunner.java
@@ -195,12 +195,10 @@ public abstract class SystemTestRunner extends 
ExternalResource {
 
 if (ordered) {
 // take order into consideration (like list)
-Assert.assertThat(Arrays.asList(answer.split(IO.RD)),
-Matchers.contains(result.toArray()));
+Assert.assertThat(result, Matchers.contains(answer.split(IO.RD)));
 } else {
 // not take order into consideration (like multiset)
-Assert.assertThat(Arrays.asList(answer.split(IO.RD)),
-Matchers.containsInAnyOrder(result.toArray()));
+Assert.assertThat(result, 
Matchers.containsInAnyOrder(answer.split(IO.RD)));
 }
 }
 



[23/50] [abbrv] incubator-hivemall git commit: add snr

2016-12-01 Thread myui
add snr



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/22a608ee
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/22a608ee
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/22a608ee

Branch: refs/heads/JIRA-22/pr-385
Commit: 22a608ee1c7239b2953183b5341f80c58b1e7045
Parents: 5088ef3
Author: amaya 
Authored: Mon Sep 26 17:07:55 2016 +0900
Committer: amaya 
Committed: Mon Sep 26 17:15:22 2016 +0900

--
 .../ftvec/selection/SignalNoiseRatioUDAF.java   | 327 +++
 .../selection/SignalNoiseRatioUDAFTest.java | 174 ++
 resources/ddl/define-all-as-permanent.hive  |   3 +
 resources/ddl/define-all.hive   |   3 +
 resources/ddl/define-all.spark  |   3 +
 resources/ddl/define-udfs.td.hql|   1 +
 6 files changed, 511 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/22a608ee/core/src/main/java/hivemall/ftvec/selection/SignalNoiseRatioUDAF.java
--
diff --git 
a/core/src/main/java/hivemall/ftvec/selection/SignalNoiseRatioUDAF.java 
b/core/src/main/java/hivemall/ftvec/selection/SignalNoiseRatioUDAF.java
new file mode 100644
index 000..b7b9126
--- /dev/null
+++ b/core/src/main/java/hivemall/ftvec/selection/SignalNoiseRatioUDAF.java
@@ -0,0 +1,327 @@
+/*
+ * Hivemall: Hive scalable Machine Learning Library
+ *
+ * Copyright (C) 2015 Makoto YUI
+ * Copyright (C) 2013-2015 National Institute of Advanced Industrial Science 
and Technology (AIST)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package hivemall.ftvec.selection;
+
+import hivemall.utils.hadoop.HiveUtils;
+import hivemall.utils.hadoop.WritableUtils;
+import hivemall.utils.lang.Preconditions;
+import org.apache.commons.math3.util.FastMath;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+@Description(name = "snr", value = "_FUNC_(array features, array 
one-hot class label)"
++ " - Returns SNR values of each feature as array")
+public class SignalNoiseRatioUDAF extends AbstractGenericUDAFResolver {
+@Override
+public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info)
+throws SemanticException {
+final ObjectInspector[] OIs = info.getParameterObjectInspectors();
+
+if (OIs.length != 2) {
+throw new UDFArgumentLengthException("Specify two arguments.");
+}
+
+if (!HiveUtils.isNumberListOI(OIs[0])) {
+throw new UDFArgumentTypeException(0,
+"Only array type argument is acceptable but " + 
OIs[0].getTypeName()
++ " was passed as `features`");
+}
+
+if (!HiveUtils.isListOI(OIs[1])
+   

[22/50] [abbrv] incubator-hivemall git commit: Implement initial SST-based change-point detector

2016-12-01 Thread myui
Implement initial SST-based change-point detector


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/3ebd771e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/3ebd771e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/3ebd771e

Branch: refs/heads/JIRA-22/pr-356
Commit: 3ebd771ee4bebf14769b7c240f8b28b9d5d10e86
Parents: 89ec56e
Author: Takuya Kitazawa 
Authored: Mon Sep 26 17:12:01 2016 +0900
Committer: Takuya Kitazawa 
Committed: Mon Sep 26 17:12:01 2016 +0900

--
 .../java/hivemall/anomaly/SSTChangePoint.java   | 118 +++
 .../hivemall/anomaly/SSTChangePointUDF.java | 197 +++
 .../hivemall/anomaly/SSTChangePointTest.java| 111 +++
 3 files changed, 426 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3ebd771e/core/src/main/java/hivemall/anomaly/SSTChangePoint.java
--
diff --git a/core/src/main/java/hivemall/anomaly/SSTChangePoint.java 
b/core/src/main/java/hivemall/anomaly/SSTChangePoint.java
new file mode 100644
index 000..e693bd4
--- /dev/null
+++ b/core/src/main/java/hivemall/anomaly/SSTChangePoint.java
@@ -0,0 +1,118 @@
+/*
+ * Hivemall: Hive scalable Machine Learning Library
+ *
+ * Copyright (C) 2015 Makoto YUI
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package hivemall.anomaly;
+
+import hivemall.anomaly.SSTChangePointUDF.SSTChangePointInterface;
+import hivemall.anomaly.SSTChangePointUDF.Parameters;
+import hivemall.utils.collections.DoubleRingBuffer;
+import org.apache.commons.math3.linear.MatrixUtils;
+import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.linear.SingularValueDecomposition;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+
+import java.util.Arrays;
+
+import javax.annotation.Nonnull;
+
+final class SSTChangePoint implements SSTChangePointInterface {
+
+@Nonnull
+private final PrimitiveObjectInspector oi;
+
+@Nonnull
+private final int window;
+@Nonnull
+private final int nPastWindow;
+@Nonnull
+private final int nCurrentWindow;
+@Nonnull
+private final int pastSize;
+@Nonnull
+private final int currentSize;
+@Nonnull
+private final int currentOffset;
+@Nonnull
+private final int r;
+
+@Nonnull
+private final DoubleRingBuffer xRing;
+@Nonnull
+private final double[] xSeries;
+
+SSTChangePoint(@Nonnull Parameters params, @Nonnull 
PrimitiveObjectInspector oi) {
+this.oi = oi;
+
+this.window = params.w;
+this.nPastWindow = params.n;
+this.nCurrentWindow = params.m;
+this.pastSize = window + nPastWindow;
+this.currentSize = window + nCurrentWindow;
+this.currentOffset = params.g;
+this.r = params.r;
+
+// (w + n) past samples for the n-past-windows
+// (w + m) current samples for the m-current-windows, starting from 
offset g
+// => need to hold past (w + n + g + w + m) samples from the latest 
sample
+int holdSampleSize = pastSize + currentOffset + currentSize;
+
+this.xRing = new DoubleRingBuffer(holdSampleSize);
+this.xSeries = new double[holdSampleSize];
+}
+
+@Override
+public void update(@Nonnull final Object arg, @Nonnull final double[] 
outScores)
+throws HiveException {
+double x = PrimitiveObjectInspectorUtils.getDouble(arg, oi);
+xRing.add(x).toArray(xSeries, true /* FIFO */);
+
+// need to wait until the buffer is filled
+if (!xRing.isFull()) {
+outScores[0]  = 0.d;
+} else {
+outScores[0] = computeScore();
+}
+}
+
+private double computeScore() {
+// create past trajectory matrix and find its left singular vectors
+RealMatrix H = MatrixUtils.createRealMatrix(window, nPastWindow);
+for (int i = 0; i < nPastWindow; i++) {
+H.setColumn(i, Arrays.copyOfRange(xSeries, i, i + window));
+}
+ 

[21/50] [abbrv] incubator-hivemall git commit: add tests

2016-12-01 Thread myui
add tests



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/5088ef36
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/5088ef36
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/5088ef36

Branch: refs/heads/JIRA-22/pr-385
Commit: 5088ef36367df1cd51ae62f1c044933676975e2e
Parents: a882c5f
Author: amaya 
Authored: Wed Sep 21 16:22:09 2016 +0900
Committer: amaya 
Committed: Wed Sep 21 18:00:35 2016 +0900

--
 .../tools/matrix/TransposeAndDotUDAF.java   |  2 +-
 .../ftvec/selection/ChiSquareUDFTest.java   | 80 
 .../tools/array/SelectKBeatUDFTest.java | 65 
 .../tools/matrix/TransposeAndDotUDAFTest.java   | 58 ++
 4 files changed, 204 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/5088ef36/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
--
diff --git a/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java 
b/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
index 9d68f93..9df9305 100644
--- a/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
+++ b/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
@@ -70,7 +70,7 @@ public final class TransposeAndDotUDAF extends 
AbstractGenericUDAFResolver {
 return new TransposeAndDotUDAFEvaluator();
 }
 
-private static final class TransposeAndDotUDAFEvaluator extends 
GenericUDAFEvaluator {
+static final class TransposeAndDotUDAFEvaluator extends 
GenericUDAFEvaluator {
 // PARTIAL1 and COMPLETE
 private ListObjectInspector matrix0RowOI;
 private PrimitiveObjectInspector matrix0ElOI;

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/5088ef36/core/src/test/java/hivemall/ftvec/selection/ChiSquareUDFTest.java
--
diff --git a/core/src/test/java/hivemall/ftvec/selection/ChiSquareUDFTest.java 
b/core/src/test/java/hivemall/ftvec/selection/ChiSquareUDFTest.java
new file mode 100644
index 000..38f7f57
--- /dev/null
+++ b/core/src/test/java/hivemall/ftvec/selection/ChiSquareUDFTest.java
@@ -0,0 +1,80 @@
+/*
+ * Hivemall: Hive scalable Machine Learning Library
+ *
+ * Copyright (C) 2016 Makoto YUI
+ * Copyright (C) 2013-2015 National Institute of Advanced Industrial Science 
and Technology (AIST)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package hivemall.ftvec.selection;
+
+import hivemall.utils.hadoop.WritableUtils;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class ChiSquareUDFTest {
+
+@Test
+public void test() throws Exception {
+// this test is based on iris data set
+final ChiSquareUDF chi2 = new ChiSquareUDF();
+final List> observed = new 
ArrayList>();
+final List> expected = new 
ArrayList>();
+final GenericUDF.DeferredObject[] dObjs = new 
GenericUDF.DeferredObject[] {
+new GenericUDF.DeferredJavaObject(observed),
+new GenericUDF.DeferredJavaObject(expected)};
+
+final double[][] matrix0 = new double[][] {
+{250.28, 170.93, 73.2, 
12.196},
+{296.8, 138.53, 212.97, 66.3},
+{329.3, 148.7, 277.57, 
101.28}};
+final double[][] matrix1 = new double[][] {
+{292.1666753739119, 152.7455081467, 187.9893418327, 
59.9511948589},
+{292.1666753739119, 152.7455081467, 187.9893418327, 
59.9511948589},
+{292.1666753739119, 152.7455081467, 187.9893418327, 
59.9511948589}};

[16/50] [abbrv] incubator-hivemall git commit: standardize to chi2

2016-12-01 Thread myui
standardize to chi2



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/6dc23449
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/6dc23449
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/6dc23449

Branch: refs/heads/JIRA-22/pr-385
Commit: 6dc234490dc25f563b22e5659c378e6ebcf8dcdb
Parents: 89c81aa
Author: amaya 
Authored: Wed Sep 21 11:41:59 2016 +0900
Committer: amaya 
Committed: Wed Sep 21 13:35:23 2016 +0900

--
 resources/ddl/define-all-as-permanent.hive | 4 ++--
 resources/ddl/define-all.hive  | 4 ++--
 resources/ddl/define-all.spark | 4 ++--
 resources/ddl/define-udfs.td.hql   | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/6dc23449/resources/ddl/define-all-as-permanent.hive
--
diff --git a/resources/ddl/define-all-as-permanent.hive 
b/resources/ddl/define-all-as-permanent.hive
index adf6a14..b515b24 100644
--- a/resources/ddl/define-all-as-permanent.hive
+++ b/resources/ddl/define-all-as-permanent.hive
@@ -206,8 +206,8 @@ CREATE FUNCTION l2_normalize as 
'hivemall.ftvec.scaling.L2NormalizationUDF' USIN
 -- selection functions --
 -
 
-DROP FUNCTION IF EXISTS chi_square;
-CREATE FUNCTION chi_square as 'hivemall.ftvec.selection.ChiSquareUDF' USING 
JAR '${hivemall_jar}';
+DROP FUNCTION IF EXISTS chi2;
+CREATE FUNCTION chi2 as 'hivemall.ftvec.selection.ChiSquareUDF' USING JAR 
'${hivemall_jar}';
 
 
 -- misc functions --

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/6dc23449/resources/ddl/define-all.hive
--
diff --git a/resources/ddl/define-all.hive b/resources/ddl/define-all.hive
index 1586d2e..2124892 100644
--- a/resources/ddl/define-all.hive
+++ b/resources/ddl/define-all.hive
@@ -202,8 +202,8 @@ create temporary function l2_normalize as 
'hivemall.ftvec.scaling.L2Normalizatio
 -- selection functions --
 -
 
-drop temporary function chi_square;
-create temporary function chi_square as 
'hivemall.ftvec.selection.ChiSquareUDF';
+drop temporary function chi2;
+create temporary function chi2 as 'hivemall.ftvec.selection.ChiSquareUDF';
 
 ---
 -- Feature engineering functions --

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/6dc23449/resources/ddl/define-all.spark
--
diff --git a/resources/ddl/define-all.spark b/resources/ddl/define-all.spark
index 50d560b..47f0ce5 100644
--- a/resources/ddl/define-all.spark
+++ b/resources/ddl/define-all.spark
@@ -187,8 +187,8 @@ sqlContext.sql("CREATE TEMPORARY FUNCTION normalize AS 
'hivemall.ftvec.scaling.L
  * selection functions
  */
 
-sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS chi_square")
-sqlContext.sql("CREATE TEMPORARY FUNCTION chi_square AS 
'hivemall.ftvec.selection.ChiSquareUDF'")
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS chi2")
+sqlContext.sql("CREATE TEMPORARY FUNCTION chi2 AS 
'hivemall.ftvec.selection.ChiSquareUDF'")
 
 /**
  * misc functions

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/6dc23449/resources/ddl/define-udfs.td.hql
--
diff --git a/resources/ddl/define-udfs.td.hql b/resources/ddl/define-udfs.td.hql
index 601eead..fd7dc1d 100644
--- a/resources/ddl/define-udfs.td.hql
+++ b/resources/ddl/define-udfs.td.hql
@@ -50,7 +50,7 @@ create temporary function powered_features as 
'hivemall.ftvec.pairing.PoweredFea
 create temporary function rescale as 'hivemall.ftvec.scaling.RescaleUDF';
 create temporary function zscore as 'hivemall.ftvec.scaling.ZScoreUDF';
 create temporary function l2_normalize as 
'hivemall.ftvec.scaling.L2NormalizationUDF';
-create temporary function chi_square as 
'hivemall.ftvec.selection.ChiSquareUDF';
+create temporary function chi2 as 'hivemall.ftvec.selection.ChiSquareUDF';
 create temporary function amplify as 'hivemall.ftvec.amplify.AmplifierUDTF';
 create temporary function rand_amplify as 
'hivemall.ftvec.amplify.RandomAmplifierUDTF';
 create temporary function add_bias as 'hivemall.ftvec.AddBiasUDF';



[42/50] [abbrv] incubator-hivemall git commit: Refine access modifiers/calls

2016-12-01 Thread myui
Refine access modifiers/calls



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/ddd8dc2d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/ddd8dc2d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/ddd8dc2d

Branch: refs/heads/JIRA-22/pr-336
Commit: ddd8dc2dbf8222c9d9d84b038dbdcd9aef1f1a87
Parents: 7447dde
Author: amaya 
Authored: Fri Nov 18 04:22:51 2016 +0900
Committer: amaya 
Committed: Fri Nov 18 04:22:51 2016 +0900

--
 .../systemtest/runner/HiveSystemTestRunner.java |  4 +-
 .../systemtest/runner/SystemTestRunner.java | 40 +++-
 .../systemtest/runner/SystemTestTeam.java   |  8 +---
 .../systemtest/runner/TDSystemTestRunner.java   | 24 ++--
 4 files changed, 36 insertions(+), 40 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ddd8dc2d/systemtest/src/main/java/hivemall/systemtest/runner/HiveSystemTestRunner.java
--
diff --git 
a/systemtest/src/main/java/hivemall/systemtest/runner/HiveSystemTestRunner.java 
b/systemtest/src/main/java/hivemall/systemtest/runner/HiveSystemTestRunner.java
index 25a2125..db1edc7 100644
--- 
a/systemtest/src/main/java/hivemall/systemtest/runner/HiveSystemTestRunner.java
+++ 
b/systemtest/src/main/java/hivemall/systemtest/runner/HiveSystemTestRunner.java
@@ -101,7 +101,7 @@ public class HiveSystemTestRunner extends SystemTestRunner {
 }
 
 @Override
-protected void finRunner() {
+void finRunner() {
 if (container != null) {
 container.tearDown();
 }
@@ -111,7 +111,7 @@ public class HiveSystemTestRunner extends SystemTestRunner {
 }
 
 @Override
-protected List exec(@Nonnull final RawHQ hq) {
+public List exec(@Nonnull final RawHQ hq) {
 logger.info("executing: `" + hq.query + "`");
 
 return hShell.executeQuery(hq.query);

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ddd8dc2d/systemtest/src/main/java/hivemall/systemtest/runner/SystemTestRunner.java
--
diff --git 
a/systemtest/src/main/java/hivemall/systemtest/runner/SystemTestRunner.java 
b/systemtest/src/main/java/hivemall/systemtest/runner/SystemTestRunner.java
index f16da90..e142174 100644
--- a/systemtest/src/main/java/hivemall/systemtest/runner/SystemTestRunner.java
+++ b/systemtest/src/main/java/hivemall/systemtest/runner/SystemTestRunner.java
@@ -45,7 +45,6 @@ import javax.annotation.Nullable;
 import java.io.FileInputStream;
 import java.io.InputStream;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
@@ -55,9 +54,9 @@ import java.util.Set;
 public abstract class SystemTestRunner extends ExternalResource {
 static final Logger logger = 
LoggerFactory.getLogger(SystemTestRunner.class);
 @Nonnull
-final List classInitHqs;
+private final List classInitHqs;
 @Nonnull
-final Set immutableTables;
+private final Set immutableTables;
 @Nonnull
 final String dbName;
 @Nonnull
@@ -98,7 +97,7 @@ public abstract class SystemTestRunner extends 
ExternalResource {
 @Override
 protected void after() {
 try {
-resetDB(); // clean up database
+cleanDB(); // clean up database
 } catch (Exception ex) {
 throw new QueryExecutionException("Failed to clean up temporary 
database. "
 + ex.getMessage());
@@ -111,16 +110,16 @@ public abstract class SystemTestRunner extends 
ExternalResource {
 
 abstract void finRunner();
 
-public void initBy(@Nonnull final HQBase hq) {
+protected void initBy(@Nonnull final HQBase hq) {
 classInitHqs.add(hq);
 }
 
-public void initBy(@Nonnull final List hqs) {
+protected void initBy(@Nonnull final List hqs) {
 classInitHqs.addAll(hqs);
 }
 
 // fix to temporary database and user-defined init (should be called per 
Test class)
-void prepareDB() throws Exception {
+private void prepareDB() throws Exception {
 createDB(dbName);
 use(dbName);
 for (HQBase q : classInitHqs) {
@@ -136,15 +135,21 @@ public abstract class SystemTestRunner extends 
ExternalResource {
 }
 
 // drop temporary database (should be called per Test class)
-void resetDB() throws Exception {
+private void cleanDB() throws Exception {
 dropDB(dbName);
 }
 
-public final boolean isImmutableTable(final String tableName) {
-return immutableTables.contains(tableName);
+// drop temporary tables (should be called per Test method)
+void resetDB() throws Exception {
+

[37/50] [abbrv] incubator-hivemall git commit: Add exception

2016-12-01 Thread myui
Add exception



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/33eab26f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/33eab26f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/33eab26f

Branch: refs/heads/JIRA-22/pr-336
Commit: 33eab26f383dbdbce00a209e742b611a63d953cf
Parents: ba91267
Author: amaya 
Authored: Thu Nov 17 14:16:14 2016 +0900
Committer: amaya 
Committed: Thu Nov 17 14:16:14 2016 +0900

--
 .../main/java/hivemall/systemtest/runner/HiveSystemTestRunner.java  | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/33eab26f/systemtest/src/main/java/hivemall/systemtest/runner/HiveSystemTestRunner.java
--
diff --git 
a/systemtest/src/main/java/hivemall/systemtest/runner/HiveSystemTestRunner.java 
b/systemtest/src/main/java/hivemall/systemtest/runner/HiveSystemTestRunner.java
index 6b41855..25a2125 100644
--- 
a/systemtest/src/main/java/hivemall/systemtest/runner/HiveSystemTestRunner.java
+++ 
b/systemtest/src/main/java/hivemall/systemtest/runner/HiveSystemTestRunner.java
@@ -132,6 +132,7 @@ public class HiveSystemTestRunner extends SystemTestRunner {
 hShell.insertInto(dbName, 
hq.tableName).addRowsFromTsv(hq.file).commit();
 break;
 case MSGPACK:
+throw new Exception("MessagePack is not supported in 
HiveSystemTestRunner");
 case UNKNOWN:
 throw new Exception("Input csv or tsv");
 }



[24/50] [abbrv] incubator-hivemall git commit: Rename SSTChangePoint -> SingularSpectrumTransform

2016-12-01 Thread myui
Rename SSTChangePoint -> SingularSpectrumTransform


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/bde06e09
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/bde06e09
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/bde06e09

Branch: refs/heads/JIRA-22/pr-356
Commit: bde06e0952445bf60a9aef4bca182c0afe87e250
Parents: 3ebd771
Author: Takuya Kitazawa 
Authored: Tue Sep 27 14:06:20 2016 +0900
Committer: Takuya Kitazawa 
Committed: Tue Sep 27 14:06:20 2016 +0900

--
 .../java/hivemall/anomaly/SSTChangePoint.java   | 118 ---
 .../hivemall/anomaly/SSTChangePointUDF.java | 197 ---
 .../anomaly/SingularSpectrumTransform.java  | 118 +++
 .../anomaly/SingularSpectrumTransformUDF.java   | 197 +++
 .../hivemall/anomaly/SSTChangePointTest.java| 111 ---
 .../anomaly/SingularSpectrumTransformTest.java  | 111 +++
 6 files changed, 426 insertions(+), 426 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bde06e09/core/src/main/java/hivemall/anomaly/SSTChangePoint.java
--
diff --git a/core/src/main/java/hivemall/anomaly/SSTChangePoint.java 
b/core/src/main/java/hivemall/anomaly/SSTChangePoint.java
deleted file mode 100644
index e693bd4..000
--- a/core/src/main/java/hivemall/anomaly/SSTChangePoint.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Hivemall: Hive scalable Machine Learning Library
- *
- * Copyright (C) 2015 Makoto YUI
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package hivemall.anomaly;
-
-import hivemall.anomaly.SSTChangePointUDF.SSTChangePointInterface;
-import hivemall.anomaly.SSTChangePointUDF.Parameters;
-import hivemall.utils.collections.DoubleRingBuffer;
-import org.apache.commons.math3.linear.MatrixUtils;
-import org.apache.commons.math3.linear.RealMatrix;
-import org.apache.commons.math3.linear.SingularValueDecomposition;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-
-import java.util.Arrays;
-
-import javax.annotation.Nonnull;
-
-final class SSTChangePoint implements SSTChangePointInterface {
-
-@Nonnull
-private final PrimitiveObjectInspector oi;
-
-@Nonnull
-private final int window;
-@Nonnull
-private final int nPastWindow;
-@Nonnull
-private final int nCurrentWindow;
-@Nonnull
-private final int pastSize;
-@Nonnull
-private final int currentSize;
-@Nonnull
-private final int currentOffset;
-@Nonnull
-private final int r;
-
-@Nonnull
-private final DoubleRingBuffer xRing;
-@Nonnull
-private final double[] xSeries;
-
-SSTChangePoint(@Nonnull Parameters params, @Nonnull 
PrimitiveObjectInspector oi) {
-this.oi = oi;
-
-this.window = params.w;
-this.nPastWindow = params.n;
-this.nCurrentWindow = params.m;
-this.pastSize = window + nPastWindow;
-this.currentSize = window + nCurrentWindow;
-this.currentOffset = params.g;
-this.r = params.r;
-
-// (w + n) past samples for the n-past-windows
-// (w + m) current samples for the m-current-windows, starting from 
offset g
-// => need to hold past (w + n + g + w + m) samples from the latest 
sample
-int holdSampleSize = pastSize + currentOffset + currentSize;
-
-this.xRing = new DoubleRingBuffer(holdSampleSize);
-this.xSeries = new double[holdSampleSize];
-}
-
-@Override
-public void update(@Nonnull final Object arg, @Nonnull final double[] 
outScores)
-throws HiveException {
-double x = PrimitiveObjectInspectorUtils.getDouble(arg, oi);
-xRing.add(x).toArray(xSeries, true /* FIFO */);
-
-// need to wait until the buffer is filled
-if (!xRing.isFull()) {
-outScores[0]  = 0.d;
-} else {
-outScores[0] = computeScore();
-}
-}
-
-private double computeScore() {
-// create past trajectory matrix and find its left

[31/50] [abbrv] incubator-hivemall git commit: Support implicit-Krylov-approximation-based efficient SST

2016-12-01 Thread myui
Support implicit-Krylov-approximation-based efficient SST


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/998203d5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/998203d5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/998203d5

Branch: refs/heads/JIRA-22/pr-356
Commit: 998203d5e8623d6282c2b187df24e4da7d41c16b
Parents: 2bfd127
Author: Takuya Kitazawa 
Authored: Wed Sep 28 19:49:48 2016 +0900
Committer: Takuya Kitazawa 
Committed: Wed Sep 28 19:49:48 2016 +0900

--
 .../anomaly/SingularSpectrumTransform.java  | 103 --
 .../anomaly/SingularSpectrumTransformUDF.java   |  27 +++
 .../java/hivemall/utils/math/MatrixUtils.java   | 203 +++
 .../anomaly/SingularSpectrumTransformTest.java  |  61 --
 .../hivemall/utils/math/MatrixUtilsTest.java|  67 ++
 5 files changed, 434 insertions(+), 27 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/998203d5/core/src/main/java/hivemall/anomaly/SingularSpectrumTransform.java
--
diff --git a/core/src/main/java/hivemall/anomaly/SingularSpectrumTransform.java 
b/core/src/main/java/hivemall/anomaly/SingularSpectrumTransform.java
index c964129..f9f6222 100644
--- a/core/src/main/java/hivemall/anomaly/SingularSpectrumTransform.java
+++ b/core/src/main/java/hivemall/anomaly/SingularSpectrumTransform.java
@@ -18,9 +18,11 @@
 package hivemall.anomaly;
 
 import 
hivemall.anomaly.SingularSpectrumTransformUDF.SingularSpectrumTransformInterface;
+import hivemall.anomaly.SingularSpectrumTransformUDF.ScoreFunction;
 import hivemall.anomaly.SingularSpectrumTransformUDF.Parameters;
 import hivemall.utils.collections.DoubleRingBuffer;
-import org.apache.commons.math3.linear.MatrixUtils;
+import hivemall.utils.math.MatrixUtils;
+import org.apache.commons.math3.linear.Array2DRowRealMatrix;
 import org.apache.commons.math3.linear.RealMatrix;
 import org.apache.commons.math3.linear.SingularValueDecomposition;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -28,6 +30,8 @@ import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
 
 import java.util.Arrays;
+import java.util.TreeMap;
+import java.util.Collections;
 
 import javax.annotation.Nonnull;
 
@@ -37,6 +41,9 @@ final class SingularSpectrumTransform implements 
SingularSpectrumTransformInterf
 private final PrimitiveObjectInspector oi;
 
 @Nonnull
+private final ScoreFunction scoreFunc;
+
+@Nonnull
 private final int window;
 @Nonnull
 private final int nPastWindow;
@@ -50,15 +57,22 @@ final class SingularSpectrumTransform implements 
SingularSpectrumTransformInterf
 private final int currentOffset;
 @Nonnull
 private final int r;
+@Nonnull
+private final int k;
 
 @Nonnull
 private final DoubleRingBuffer xRing;
 @Nonnull
 private final double[] xSeries;
 
+@Nonnull
+private final double[] q;
+
 SingularSpectrumTransform(@Nonnull Parameters params, @Nonnull 
PrimitiveObjectInspector oi) {
 this.oi = oi;
 
+this.scoreFunc = params.scoreFunc;
+
 this.window = params.w;
 this.nPastWindow = params.n;
 this.nCurrentWindow = params.m;
@@ -66,6 +80,7 @@ final class SingularSpectrumTransform implements 
SingularSpectrumTransformInterf
 this.currentSize = window + nCurrentWindow;
 this.currentOffset = params.g;
 this.r = params.r;
+this.k = params.k;
 
 // (w + n) past samples for the n-past-windows
 // (w + m) current samples for the m-current-windows, starting from 
offset g
@@ -74,6 +89,18 @@ final class SingularSpectrumTransform implements 
SingularSpectrumTransformInterf
 
 this.xRing = new DoubleRingBuffer(holdSampleSize);
 this.xSeries = new double[holdSampleSize];
+
+this.q = new double[window];
+double norm = 0.d;
+for (int i = 0; i < window; i++) {
+this.q[i] = Math.random();
+norm += q[i] * q[i];
+}
+norm = Math.sqrt(norm);
+// normalize
+for (int i = 0; i < window; i++) {
+this.q[i] = q[i] / norm;
+}
 }
 
 @Override
@@ -86,25 +113,39 @@ final class SingularSpectrumTransform implements 
SingularSpectrumTransformInterf
 if (!xRing.isFull()) {
 outScores[0]  = 0.d;
 } else {
-outScores[0] = computeScore();
+// create past trajectory matrix and find its left singular vectors
+RealMatrix H = new Array2DRowRealMatrix(new 
double[window][nPastWindow]);
+

[33/50] [abbrv] incubator-hivemall git commit: minor fix

2016-12-01 Thread myui
minor fix



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/8d9f0d4c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/8d9f0d4c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/8d9f0d4c

Branch: refs/heads/JIRA-22/pr-385
Commit: 8d9f0d4c00758324029d342eb4b892e046ca4a49
Parents: 80be81e
Author: amaya 
Authored: Thu Sep 29 11:02:14 2016 +0900
Committer: amaya 
Committed: Thu Sep 29 11:02:14 2016 +0900

--
 .../test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8d9f0d4c/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
--
diff --git 
a/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
 
b/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
index 7b62b92..fe73a1b 100644
--- 
a/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
+++ 
b/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
@@ -743,8 +743,8 @@ final class HivemallOpsWithFeatureSuite extends 
HivemallFeatureQueryTest {
 val df0 = Seq((1, Seq(1, 2, 3), Seq(5, 6, 7)), (1, Seq(3, 4, 5), Seq(7, 8, 
9)))
   .toDF("c0", "arg0", "arg1")
 
-df0.groupby($"c0").transpose_and_dot("arg0", "arg1").collect() shouldEqual
-  Seq(Row(1, Seq(Seq(26.0, 30.0, 34.0), Seq(38.0, 44.0, 50.0), Seq(50.0, 
58.0, 66.0
+checkAnswer(df0.groupby($"c0").transpose_and_dot("arg0", "arg1"),
+  Seq(Row(1, Seq(Seq(26.0, 30.0, 34.0), Seq(38.0, 44.0, 50.0), Seq(50.0, 
58.0, 66.0)
   }
 }
 



[27/50] [abbrv] incubator-hivemall git commit: Add references for the original SST papers

2016-12-01 Thread myui
Add references for the original SST papers


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/2bfd1270
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/2bfd1270
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/2bfd1270

Branch: refs/heads/JIRA-22/pr-356
Commit: 2bfd1270b1e9b79185a41cbe2568f2ce968d4a71
Parents: bde06e0
Author: Takuya Kitazawa 
Authored: Wed Sep 28 11:16:56 2016 +0900
Committer: Takuya Kitazawa 
Committed: Wed Sep 28 11:22:46 2016 +0900

--
 .../hivemall/anomaly/SingularSpectrumTransformUDF.java   | 11 +++
 1 file changed, 11 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/2bfd1270/core/src/main/java/hivemall/anomaly/SingularSpectrumTransformUDF.java
--
diff --git 
a/core/src/main/java/hivemall/anomaly/SingularSpectrumTransformUDF.java 
b/core/src/main/java/hivemall/anomaly/SingularSpectrumTransformUDF.java
index 2ec0a91..64b7d20 100644
--- a/core/src/main/java/hivemall/anomaly/SingularSpectrumTransformUDF.java
+++ b/core/src/main/java/hivemall/anomaly/SingularSpectrumTransformUDF.java
@@ -41,6 +41,17 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 
+/**
+ * Change-point detection based on Singular Spectrum Transformation (SST).
+ *
+ * References:
+ * 
+ * T. Ide and K. Inoue,
+ * "Knowledge Discovery from Heterogeneous Dynamic Systems using Change-Point 
Correlations", SDM'05.
+ * T. Ide and K. Tsuda, "Change-point detection using Krylov subspace 
learning", SDM'07.
+ * 
+ */
+
 @Description(
 name = "sst",
 value = "_FUNC_(double|array x [, const string options])"



[29/50] [abbrv] incubator-hivemall git commit: refine tests

2016-12-01 Thread myui
refine tests



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/8e2842cf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/8e2842cf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/8e2842cf

Branch: refs/heads/JIRA-22/pr-385
Commit: 8e2842cf8c272642feaa76bf95e8fa463b0322dc
Parents: 1347de9
Author: amaya 
Authored: Wed Sep 28 14:24:19 2016 +0900
Committer: amaya 
Committed: Wed Sep 28 14:24:19 2016 +0900

--
 .../ftvec/selection/ChiSquareUDFTest.java   | 12 ++--
 .../selection/SignalNoiseRatioUDAFTest.java | 71 
 2 files changed, 64 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8e2842cf/core/src/test/java/hivemall/ftvec/selection/ChiSquareUDFTest.java
--
diff --git a/core/src/test/java/hivemall/ftvec/selection/ChiSquareUDFTest.java 
b/core/src/test/java/hivemall/ftvec/selection/ChiSquareUDFTest.java
index 38f7f57..d5880b8 100644
--- a/core/src/test/java/hivemall/ftvec/selection/ChiSquareUDFTest.java
+++ b/core/src/test/java/hivemall/ftvec/selection/ChiSquareUDFTest.java
@@ -69,12 +69,12 @@ public class ChiSquareUDFTest {
 result1[i] = Double.valueOf(((List) result[1]).get(i).toString());
 }
 
-final double[] answer0 = new double[] {10.817820878493995, 
3.5944990176817315,
-116.16984746363957, 67.24482558215503};
-final double[] answer1 = new double[] {0.004476514990225833, 
0.16575416718561453, 0.d,
-2.55351295663786e-15};
+// compare with results by scikit-learn
+final double[] answer0 = new double[] {10.81782088, 3.59449902, 
116.16984746, 67.24482759};
+final double[] answer1 = new double[] {4.47651499e-03, 1.65754167e-01, 
5.94344354e-26,
+2.50017968e-15};
 
-Assert.assertArrayEquals(answer0, result0, 0.d);
-Assert.assertArrayEquals(answer1, result1, 0.d);
+Assert.assertArrayEquals(answer0, result0, 1e-5);
+Assert.assertArrayEquals(answer1, result1, 1e-5);
 }
 }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8e2842cf/core/src/test/java/hivemall/ftvec/selection/SignalNoiseRatioUDAFTest.java
--
diff --git 
a/core/src/test/java/hivemall/ftvec/selection/SignalNoiseRatioUDAFTest.java 
b/core/src/test/java/hivemall/ftvec/selection/SignalNoiseRatioUDAFTest.java
index 4655545..56a01d0 100644
--- a/core/src/test/java/hivemall/ftvec/selection/SignalNoiseRatioUDAFTest.java
+++ b/core/src/test/java/hivemall/ftvec/selection/SignalNoiseRatioUDAFTest.java
@@ -40,7 +40,8 @@ public class SignalNoiseRatioUDAFTest {
 public ExpectedException expectedException = ExpectedException.none();
 
 @Test
-public void test() throws Exception {
+public void snrBinaryClass() throws Exception {
+// this test is based on *subset* of iris data set
 final SignalNoiseRatioUDAF snr = new SignalNoiseRatioUDAF();
 final ObjectInspector[] OIs = new ObjectInspector[] {
 
ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector),
@@ -51,20 +52,62 @@ public class SignalNoiseRatioUDAFTest {
 final 
SignalNoiseRatioUDAF.SignalNoiseRatioUDAFEvaluator.SignalNoiseRatioAggregationBuffer
 agg = 
(SignalNoiseRatioUDAF.SignalNoiseRatioUDAFEvaluator.SignalNoiseRatioAggregationBuffer)
 evaluator.getNewAggregationBuffer();
 evaluator.reset(agg);
 
-final double[][] featuress = new double[][] { {5.1, 3.5, 1.4, 0.2}, 
{4.9, 3.d, 1.4, 0.2},
+final double[][] features = new double[][] { {5.1, 3.5, 1.4, 0.2}, 
{4.9, 3.d, 1.4, 0.2},
+{4.7, 3.2, 1.3, 0.2}, {7.d, 3.2, 4.7, 1.4}, {6.4, 3.2, 4.5, 
1.5},
+{6.9, 3.1, 4.9, 1.5}};
+
+final int[][] labels = new int[][] { {1, 0}, {1, 0}, {1, 0}, {0, 1}, 
{0, 1}, {0, 1}};
+
+for (int i = 0; i < features.length; i++) {
+final List labelList = new ArrayList();
+for (int label : labels[i]) {
+labelList.add(new IntWritable(label));
+}
+evaluator.iterate(agg, new Object[] 
{WritableUtils.toWritableList(features[i]),
+labelList});
+}
+
+@SuppressWarnings("unchecked")
+final List resultObj = (ArrayList) 
evaluator.terminate(agg);
+final int size = resultObj.size();
+final double[] result = new double[size];
+for (int i = 0; i < size; i++) {
+result[i] = resultObj.get(i).get();
+}
+
+// compare with result by numpy
+final double[] answer

[38/50] [abbrv] incubator-hivemall git commit: Make dir name static

2016-12-01 Thread myui
Make dir name static



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/1f3df54c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/1f3df54c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/1f3df54c

Branch: refs/heads/JIRA-22/pr-336
Commit: 1f3df54c0183a61390f58b94f58c12e531754a09
Parents: 33eab26
Author: amaya 
Authored: Fri Nov 18 01:57:31 2016 +0900
Committer: amaya 
Committed: Fri Nov 18 01:57:31 2016 +0900

--
 .../hivemall/systemtest/runner/SystemTestCommonInfo.java  | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1f3df54c/systemtest/src/main/java/hivemall/systemtest/runner/SystemTestCommonInfo.java
--
diff --git 
a/systemtest/src/main/java/hivemall/systemtest/runner/SystemTestCommonInfo.java 
b/systemtest/src/main/java/hivemall/systemtest/runner/SystemTestCommonInfo.java
index 60292fa..82b433f 100644
--- 
a/systemtest/src/main/java/hivemall/systemtest/runner/SystemTestCommonInfo.java
+++ 
b/systemtest/src/main/java/hivemall/systemtest/runner/SystemTestCommonInfo.java
@@ -21,6 +21,10 @@ package hivemall.systemtest.runner;
 import javax.annotation.Nonnull;
 
 public class SystemTestCommonInfo {
+private static final String CASE = "case";
+private static final String ANSWER = "answer";
+private static final String INIT = "init";
+
 @Nonnull
 public final String baseDir;
 @Nonnull
@@ -34,9 +38,9 @@ public class SystemTestCommonInfo {
 
 public SystemTestCommonInfo(@Nonnull final Class clazz) {
 baseDir = clazz.getName().replace(".", "/");
-caseDir = baseDir + "/case/";
-answerDir = baseDir + "/answer/";
-initDir = baseDir + "/init/";
+caseDir = baseDir + "/" + CASE + "/";
+answerDir = baseDir + "/" + ANSWER + "/";
+initDir = baseDir + "/" + INIT + "/";
 dbName = clazz.getName().replace(".", "_").toLowerCase();
 }
 }



[35/50] [abbrv] incubator-hivemall git commit: Update license headers

2016-12-01 Thread myui
Update license headers



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/43ca0c86
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/43ca0c86
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/43ca0c86

Branch: refs/heads/JIRA-22/pr-336
Commit: 43ca0c86936f3ccc7f825db3c4f4ecaa48087917
Parents: faebaf9
Author: amaya 
Authored: Wed Nov 16 15:23:49 2016 +0900
Committer: amaya 
Committed: Wed Nov 16 15:23:49 2016 +0900

--
 systemtest/README.md| 18 +
 systemtest/pom.xml  | 17 +++-
 .../java/com/klarna/hiverunner/Extractor.java   | 28 ++--
 .../hivemall/systemtest/MsgpackConverter.java   | 28 ++--
 .../exception/QueryExecutionException.java  | 28 ++--
 .../systemtest/model/CreateTableHQ.java | 28 ++--
 .../hivemall/systemtest/model/DropTableHQ.java  | 28 ++--
 .../main/java/hivemall/systemtest/model/HQ.java | 28 ++--
 .../java/hivemall/systemtest/model/HQBase.java  | 28 ++--
 .../hivemall/systemtest/model/InsertHQ.java | 28 ++--
 .../java/hivemall/systemtest/model/RawHQ.java   | 28 ++--
 .../java/hivemall/systemtest/model/TableHQ.java | 28 ++--
 .../hivemall/systemtest/model/TableListHQ.java  | 28 ++--
 .../model/UploadFileAsNewTableHQ.java   | 28 ++--
 .../hivemall/systemtest/model/UploadFileHQ.java | 28 ++--
 .../model/UploadFileToExistingHQ.java   | 28 ++--
 .../model/lazy/LazyMatchingResource.java| 28 ++--
 .../systemtest/runner/HiveSystemTestRunner.java | 28 ++--
 .../systemtest/runner/SystemTestCommonInfo.java | 28 ++--
 .../systemtest/runner/SystemTestRunner.java | 28 ++--
 .../systemtest/runner/SystemTestTeam.java   | 28 ++--
 .../systemtest/runner/TDSystemTestRunner.java   | 28 ++--
 .../main/java/hivemall/systemtest/utils/IO.java | 28 ++--
 23 files changed, 328 insertions(+), 295 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/43ca0c86/systemtest/README.md
--
diff --git a/systemtest/README.md b/systemtest/README.md
index 9d1442a..4fca0c3 100644
--- a/systemtest/README.md
+++ b/systemtest/README.md
@@ -1,3 +1,21 @@
+
 ## Usage
 
 ### Initialization

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/43ca0c86/systemtest/pom.xml
--
diff --git a/systemtest/pom.xml b/systemtest/pom.xml
index e59d2ce..e7345af 100644
--- a/systemtest/pom.xml
+++ b/systemtest/pom.xml
@@ -1,4 +1,19 @@
-
+
 http://maven.apache.org/POM/4.0.0";
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/43ca0c86/systemtest/src/main/java/com/klarna/hiverunner/Extractor.java
--
diff --git a/systemtest/src/main/java/com/klarna/hiverunner/Extractor.java 
b/systemtest/src/main/java/com/klarna/hiverunner/Extractor.java
index 99720f0..f7f372f 100644
--- a/systemtest/src/main/java/com/klarna/hiverunner/Extractor.java
+++ b/systemtest/src/main/java/com/klarna/hiverunner/Extractor.java
@@ -1,20 +1,20 @@
 /*
- * Hivemall: Hive scalable Machine Learning Library
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
- * Copyright (C) 2016 Makoto YUI
- * Copyright (C) 2013-2015 National Institute of Advanced Industrial Science 
and Technology (AIST)
+ *   http://www.apache.org/licenses/LICENSE-2.0
  *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, eith

[20/50] [abbrv] incubator-hivemall git commit: mod chi2 function name

2016-12-01 Thread myui
mod chi2 function name



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/a882c5f9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/a882c5f9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/a882c5f9

Branch: refs/heads/JIRA-22/pr-385
Commit: a882c5f9f8067b911254dfc43d268de06a5490f9
Parents: b8cf396
Author: amaya 
Authored: Wed Sep 21 16:00:36 2016 +0900
Committer: amaya 
Committed: Wed Sep 21 16:23:47 2016 +0900

--
 core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java | 2 +-
 core/src/main/java/hivemall/utils/math/StatsUtils.java| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/a882c5f9/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
--
diff --git a/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java 
b/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
index 70f0316..1583959 100644
--- a/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
+++ b/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
@@ -129,7 +129,7 @@ public class ChiSquareUDF extends GenericUDF {
 }
 }
 
-final Map.Entry chi2 = 
StatsUtils.chiSquares(observed, expected);
+final Map.Entry chi2 = 
StatsUtils.chiSquare(observed, expected);
 
 final Object[] result = new Object[2];
 result[0] = WritableUtils.toWritableList(chi2.getKey());

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/a882c5f9/core/src/main/java/hivemall/utils/math/StatsUtils.java
--
diff --git a/core/src/main/java/hivemall/utils/math/StatsUtils.java 
b/core/src/main/java/hivemall/utils/math/StatsUtils.java
index e255b84..14adbff 100644
--- a/core/src/main/java/hivemall/utils/math/StatsUtils.java
+++ b/core/src/main/java/hivemall/utils/math/StatsUtils.java
@@ -262,7 +262,7 @@ public final class StatsUtils {
  * @param expecteds means positive matrix
  * @return (chi2 value[], p value[])
  */
-public static Map.Entry chiSquares(@Nonnull final 
double[][] observeds,
+public static Map.Entry chiSquare(@Nonnull final 
double[][] observeds,
 @Nonnull final double[][] expecteds) {
 Preconditions.checkArgument(observeds.length == expecteds.length);
 



[32/50] [abbrv] incubator-hivemall git commit: minor fix

2016-12-01 Thread myui
minor fix



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/80be81ec
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/80be81ec
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/80be81ec

Branch: refs/heads/JIRA-22/pr-385
Commit: 80be81ecf92cd4675dcdfaa5f456d84d484d6c44
Parents: 4cfa4e5
Author: amaya 
Authored: Wed Sep 28 20:01:08 2016 +0900
Committer: amaya 
Committed: Wed Sep 28 20:01:08 2016 +0900

--
 .../main/java/hivemall/ftvec/selection/SignalNoiseRatioUDAF.java  | 2 +-
 .../test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala   | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/80be81ec/core/src/main/java/hivemall/ftvec/selection/SignalNoiseRatioUDAF.java
--
diff --git 
a/core/src/main/java/hivemall/ftvec/selection/SignalNoiseRatioUDAF.java 
b/core/src/main/java/hivemall/ftvec/selection/SignalNoiseRatioUDAF.java
index 507aefa..96fdc5b 100644
--- a/core/src/main/java/hivemall/ftvec/selection/SignalNoiseRatioUDAF.java
+++ b/core/src/main/java/hivemall/ftvec/selection/SignalNoiseRatioUDAF.java
@@ -335,7 +335,7 @@ public class SignalNoiseRatioUDAF extends 
AbstractGenericUDAFResolver {
 final double snr = Math.abs(myAgg.meanss[j][i] - 
myAgg.meanss[k][i])
 / (sds[j] + sds[k]);
 // if `NaN`(when diff between means and both sds are 
zero, IOW, all related values are equal),
-// regard feature `i` as meaningless between class `j` 
and `k` and skip
+// regard feature `i` as meaningless between class `j` 
and `k`, skip
 if (!Double.isNaN(snr)) {
 result[i] += snr; // accept `Infinity`
 }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/80be81ec/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
--
diff --git 
a/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
 
b/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
index 2e18280..7b62b92 100644
--- 
a/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
+++ 
b/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
@@ -740,7 +740,8 @@ final class HivemallOpsWithFeatureSuite extends 
HivemallFeatureQueryTest {
 
 // | 1  2  3 |T| 5  6  7 |
 // | 3  4  5 |  *  | 7  8  9 |
-val df0 = Seq((1, Seq(1, 2, 3), Seq(5, 6, 7)), (1, Seq(3, 4, 5), Seq(7, 8, 
9))).toDF.as("c0", "arg0", "arg1")
+val df0 = Seq((1, Seq(1, 2, 3), Seq(5, 6, 7)), (1, Seq(3, 4, 5), Seq(7, 8, 
9)))
+  .toDF("c0", "arg0", "arg1")
 
 df0.groupby($"c0").transpose_and_dot("arg0", "arg1").collect() shouldEqual
   Seq(Row(1, Seq(Seq(26.0, 30.0, 34.0), Seq(38.0, 44.0, 50.0), Seq(50.0, 
58.0, 66.0



[34/50] [abbrv] incubator-hivemall git commit: change method of testing for spark

2016-12-01 Thread myui
change method of testing for spark



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/ce4a4898
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/ce4a4898
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/ce4a4898

Branch: refs/heads/JIRA-22/pr-385
Commit: ce4a48980e33b9f16c74a62fcea6878f28b9c08b
Parents: 8d9f0d4
Author: amaya 
Authored: Fri Sep 30 17:05:20 2016 +0900
Committer: amaya 
Committed: Fri Sep 30 17:05:20 2016 +0900

--
 .../spark/sql/hive/HivemallOpsSuite.scala   | 23 ++--
 .../spark/sql/hive/HivemallOpsSuite.scala   | 17 ++-
 2 files changed, 18 insertions(+), 22 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ce4a4898/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
--
diff --git 
a/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
 
b/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
index cce22ce..c7016c0 100644
--- 
a/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
+++ 
b/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
@@ -24,7 +24,6 @@ import org.apache.spark.sql.{Column, Row}
 import org.apache.spark.test.HivemallQueryTest
 import org.apache.spark.test.TestDoubleWrapper._
 import org.apache.spark.test.TestUtils._
-import org.scalatest.Matchers._
 
 final class HivemallOpsSuite extends HivemallQueryTest {
 
@@ -189,7 +188,6 @@ final class HivemallOpsSuite extends HivemallQueryTest {
 
   test("ftvec.selection - chi2") {
 import hiveContext.implicits._
-implicit val doubleEquality = 
org.scalactic.TolerantNumerics.tolerantDoubleEquality(1e-5)
 
 // see also hivemall.ftvec.selection.ChiSquareUDFTest
 val df = Seq(
@@ -204,17 +202,17 @@ final class HivemallOpsSuite extends HivemallQueryTest {
   .toDF("arg0", "arg1")
 
 val result = df.select(chi2(df("arg0"), df("arg1"))).collect
-result should have length 1
+assert(result.length == 1)
 val chi2Val = result.head.getAs[Row](0).getAs[Seq[Double]](0)
 val pVal = result.head.getAs[Row](0).getAs[Seq[Double]](1)
 
 (chi2Val, Seq(10.81782088, 3.59449902, 116.16984746, 67.24482759))
   .zipped
-  .foreach((actual, expected) => actual shouldEqual expected)
+  .foreach((actual, expected) => assert(actual ~== expected))
 
 (pVal, Seq(4.47651499e-03, 1.65754167e-01, 5.94344354e-26, 2.50017968e-15))
   .zipped
-  .foreach((actual, expected) => actual shouldEqual expected)
+  .foreach((actual, expected) => assert(actual ~== expected))
   }
 
   test("ftvec.conv - quantify") {
@@ -370,8 +368,9 @@ final class HivemallOpsSuite extends HivemallQueryTest {
 val data = Seq(Seq(0, 1, 3), Seq(2, 4, 1), Seq(5, 4, 9))
 val df = data.map(d => (d, Seq(3, 1, 2), 2)).toDF("features", 
"importance_list", "k")
 
-df.select(select_k_best(df("features"), df("importance_list"), 
df("k"))).collect shouldEqual
-  data.map(s => Row(Seq(s(0).toDouble, s(2).toDouble)))
+// if use checkAnswer here, fail for some reason, maybe type? but it's 
okay on spark-2.0
+assert(df.select(select_k_best(df("features"), df("importance_list"), 
df("k"))).collect ===
+  data.map(s => Row(Seq(s(0).toDouble, s(2).toDouble
   }
 
   test("misc - sigmoid") {
@@ -573,7 +572,6 @@ final class HivemallOpsSuite extends HivemallQueryTest {
 
   test("user-defined aggregators for ftvec.selection") {
 import hiveContext.implicits._
-implicit val doubleEquality = 
org.scalactic.TolerantNumerics.tolerantDoubleEquality(1e-5)
 
 // see also hivemall.ftvec.selection.SignalNoiseRatioUDAFTest
 // binary class
@@ -595,7 +593,7 @@ final class HivemallOpsSuite extends HivemallQueryTest {
 val row0 = df0.groupby($"c0").snr("arg0", "arg1").collect
 (row0(0).getAs[Seq[Double]](1), Seq(4.38425236, 0.26390002, 15.83984511, 
26.87005769))
   .zipped
-  .foreach((actual, expected) => actual shouldEqual expected)
+  .foreach((actual, expected) => assert(actual ~== expected))
 
 // multiple class
 // +-+---+
@@ -616,7 +614,7 @@ final class HivemallOpsSuite extends HivemallQueryTest {
 val row1 = df1.groupby($"c0").snr("arg0", "arg1").collect
 (row1(0).getAs[Seq[Double]](1), Seq(8.43181818, 1.32121212, 42.94949495, 
33.80952381))
   .zipped
-  .foreach((actual, expected) => actual shouldEqual expected)
+  .foreach((actual, expected) => assert(actual ~== expected))
   }
 
   test("user-defined aggregators for tools.matrix") {
@@ -627,7 +625,8 @@ final class HivemallOpsSuite extends HivemallQ

[41/50] [abbrv] incubator-hivemall git commit: Mod README

2016-12-01 Thread myui
Mod README



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/7447dde6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/7447dde6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/7447dde6

Branch: refs/heads/JIRA-22/pr-336
Commit: 7447dde61f3a9cb8e3ba5ab278a260d0a0615524
Parents: 144cb50
Author: amaya 
Authored: Fri Nov 18 03:23:46 2016 +0900
Committer: amaya 
Committed: Fri Nov 18 03:23:46 2016 +0900

--
 systemtest/README.md | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7447dde6/systemtest/README.md
--
diff --git a/systemtest/README.md b/systemtest/README.md
index 2805165..2b1167e 100644
--- a/systemtest/README.md
+++ b/systemtest/README.md
@@ -195,8 +195,9 @@ pink255 192 203
 ```sql
 -- write your hive queries
 -- comments like this and multiple queries in one row are allowed
-SELECT blue FROM color WHERE name = 'lavender';SELECT green FROM color WHERE 
name LIKE 'orange%' 
-SELECT name FROM color WHERE blue = 255
+SELECT blue FROM color WHERE name = 'lavender';
+SELECT green FROM color WHERE name LIKE 'orange%';
+SELECT name FROM color WHERE blue = 255;
 ```
 
 * `systemtest/src/test/resources/hivemall/QuickExample/answer/test3` 
(`systemtest/src/test/resources/${path/to/package}/${className}/answer/${fileName}`)
@@ -205,6 +206,6 @@ tsv format is required
 
 ```tsv
 250
-16569
-azurebluemagenta
+16569
+azure  bluemagenta
 ```



[10/50] [abbrv] incubator-hivemall git commit: add license and format

2016-12-01 Thread myui
add license and format



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/ad81b3aa
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/ad81b3aa
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/ad81b3aa

Branch: refs/heads/JIRA-22/pr-385
Commit: ad81b3aa5a0bbb7c248d127ba44608578c01ae00
Parents: 1ab9b09
Author: amaya 
Authored: Tue Sep 20 17:05:55 2016 +0900
Committer: amaya 
Committed: Tue Sep 20 18:37:51 2016 +0900

--
 .../hivemall/ftvec/selection/ChiSquareUDF.java  | 92 
 .../tools/array/ArrayTopKIndicesUDF.java| 29 --
 .../tools/array/SubarrayByIndicesUDF.java   | 36 ++--
 .../tools/matrix/TransposeAndDotUDAF.java   | 64 +-
 .../java/hivemall/utils/hadoop/HiveUtils.java   | 10 ++-
 .../java/hivemall/utils/math/StatsUtils.java| 29 +++---
 6 files changed, 171 insertions(+), 89 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad81b3aa/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
--
diff --git a/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java 
b/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
index 1954e33..e2b7494 100644
--- a/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
+++ b/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
@@ -1,3 +1,21 @@
+/*
+ * Hivemall: Hive scalable Machine Learning Library
+ *
+ * Copyright (C) 2016 Makoto YUI
+ * Copyright (C) 2013-2015 National Institute of Advanced Industrial Science 
and Technology (AIST)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package hivemall.ftvec.selection;
 
 import hivemall.utils.hadoop.HiveUtils;
@@ -10,24 +28,20 @@ import 
org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 
-import javax.annotation.Nonnull;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 
 @Description(name = "chi2",
-value = "_FUNC_(array> observed, array> 
expected)" +
-" - Returns chi2_val and p_val of each columns as 
, array>")
+value = "_FUNC_(array> observed, array> 
expected)"
++ " - Returns chi2_val and p_val of each columns as 
, array>")
 public class ChiSquareUDF extends GenericUDF {
 private ListObjectInspector observedOI;
 private ListObjectInspector observedRowOI;
@@ -42,31 +56,31 @@ public class ChiSquareUDF extends GenericUDF {
 throw new UDFArgumentLengthException("Specify two arguments.");
 }
 
-if (!HiveUtils.isNumberListListOI(OIs[0])){
-throw new UDFArgumentTypeException(0, "Only array> 
type argument is acceptable but "
-+ OIs[0].getTypeName() + " was passed as `observed`");
+if (!HiveUtils.isNumberListListOI(OIs[0])) {
+throw new UDFArgumentTypeException(0,
+"Only array> type argument is acceptable but " + 
OIs[0].getTypeName()
++ " was passed as `observed`");
 }
 
-if (!HiveUtils.isNumberListListOI(OIs[1])){
-throw new UDFArgumentTypeException(1, "Only array> 
type argument is acceptable but "
-+ OIs[1].getTypeName() + " was passed as `expected`");
+if (!HiveUtils.isNumberListListOI(OIs[1])) {
+throw new UDFArgumentTypeException(1,
+"Only array> typ

[03/50] [abbrv] incubator-hivemall git commit: add transpose_and_dot

2016-12-01 Thread myui
add transpose_and_dot



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/6f9b4fa0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/6f9b4fa0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/6f9b4fa0

Branch: refs/heads/JIRA-22/pr-385
Commit: 6f9b4fa0acebf604882240ccd5507d9df45bab2d
Parents: 56adf2d
Author: amaya 
Authored: Fri Sep 16 15:52:54 2016 +0900
Committer: amaya 
Committed: Fri Sep 16 15:52:54 2016 +0900

--
 .../tools/matrix/TransposeAndDotUDAF.java   | 191 +++
 1 file changed, 191 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/6f9b4fa0/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
--
diff --git a/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java 
b/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
new file mode 100644
index 000..4fa5ce4
--- /dev/null
+++ b/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
@@ -0,0 +1,191 @@
+package hivemall.tools.matrix;
+
+import hivemall.utils.hadoop.HiveUtils;
+import hivemall.utils.hadoop.WritableUtils;
+import hivemall.utils.lang.Preconditions;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+@Description(name = "transpose_and_dot",
+value = "_FUNC_(array matrix0_row, array matrix1_row)" 
+
+" - Returns dot(matrix0.T, matrix1) as array>, 
shape = (matrix0.#cols, matrix1.#cols)")
+public final class TransposeAndDotUDAF extends AbstractGenericUDAFResolver {
+@Override
+public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info) 
throws SemanticException {
+ObjectInspector[] OIs = info.getParameterObjectInspectors();
+
+if (OIs.length != 2) {
+throw new UDFArgumentLengthException("Specify two arguments.");
+}
+
+if (!HiveUtils.isNumberListOI(OIs[0])) {
+throw new UDFArgumentTypeException(0, "Only array type 
argument is acceptable but "
++ OIs[0].getTypeName() + " was passed as `matrix0_row`");
+}
+
+if (!HiveUtils.isNumberListOI(OIs[1])) {
+throw new UDFArgumentTypeException(1, "Only array type 
argument is acceptable but "
++ OIs[1].getTypeName() + " was passed as `matrix1_row`");
+}
+
+return new TransposeAndDotUDAFEvaluator();
+}
+
+private static final class TransposeAndDotUDAFEvaluator extends 
GenericUDAFEvaluator {
+// PARTIAL1 and COMPLETE
+private ListObjectInspector matrix0RowOI;
+private PrimitiveObjectInspector matrix0ElOI;
+private ListObjectInspector matrix1RowOI;
+private PrimitiveObjectInspector matrix1ElOI;
+
+// PARTIAL2 and FINAL
+private ListObjectInspector aggMatrixOI;
+private ListObjectInspector aggMatrixRowOI;
+private DoubleObjectInspector aggMatrixElOI;
+
+private double[] matrix0Row;
+private double[] matrix1Row;
+
+@AggregationType(estimable = true)
+static class TransposeAndDotAggregationBuffer extends 
AbstractAggregationBuffer {
+double[][] aggMatrix;
+
+@Override
+public int estimate() {
+return aggMatrix != null
+? aggMatrix.length * aggMatrix[0].length * 8
+: 0;
+}
+
+public void init(int n, int m) {
+aggMatrix = new double[n][m];
+}
+
+public void reset() {
+if (aggMatrix != null) {
+for (double[] row : 

[04/50] [abbrv] incubator-hivemall git commit: add chi2 and chi2_test

2016-12-01 Thread myui
add chi2 and chi2_test



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/d3009be5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/d3009be5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/d3009be5

Branch: refs/heads/JIRA-22/pr-385
Commit: d3009be59bcf314b373038e3db8903a041396931
Parents: 6f9b4fa
Author: amaya 
Authored: Fri Sep 16 16:00:58 2016 +0900
Committer: amaya 
Committed: Fri Sep 16 16:00:58 2016 +0900

--
 .../ftvec/selection/ChiSquareTestUDF.java   | 21 +
 .../hivemall/ftvec/selection/ChiSquareUDF.java  | 21 +
 .../ftvec/selection/DissociationDegreeUDF.java  | 88 
 .../java/hivemall/utils/math/StatsUtils.java| 49 +++
 4 files changed, 179 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/d3009be5/core/src/main/java/hivemall/ftvec/selection/ChiSquareTestUDF.java
--
diff --git a/core/src/main/java/hivemall/ftvec/selection/ChiSquareTestUDF.java 
b/core/src/main/java/hivemall/ftvec/selection/ChiSquareTestUDF.java
new file mode 100644
index 000..d367085
--- /dev/null
+++ b/core/src/main/java/hivemall/ftvec/selection/ChiSquareTestUDF.java
@@ -0,0 +1,21 @@
+package hivemall.ftvec.selection;
+
+import hivemall.utils.math.StatsUtils;
+import org.apache.hadoop.hive.ql.exec.Description;
+
+import javax.annotation.Nonnull;
+
+@Description(name = "chi2_test",
+value = "_FUNC_(array expected, array observed) - 
Returns p-value as double")
+public class ChiSquareTestUDF extends DissociationDegreeUDF {
+@Override
+double calcDissociation(@Nonnull final double[] expected,@Nonnull final  
double[] observed) {
+return StatsUtils.chiSquareTest(expected, observed);
+}
+
+@Override
+@Nonnull
+String getFuncName() {
+return "chi2_test";
+}
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/d3009be5/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
--
diff --git a/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java 
b/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
new file mode 100644
index 000..937b1bd
--- /dev/null
+++ b/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
@@ -0,0 +1,21 @@
+package hivemall.ftvec.selection;
+
+import hivemall.utils.math.StatsUtils;
+import org.apache.hadoop.hive.ql.exec.Description;
+
+import javax.annotation.Nonnull;
+
+@Description(name = "chi2",
+value = "_FUNC_(array expected, array observed) - 
Returns chi2-value as double")
+public class ChiSquareUDF extends DissociationDegreeUDF {
+@Override
+double calcDissociation(@Nonnull final double[] expected,@Nonnull final  
double[] observed) {
+return StatsUtils.chiSquare(expected, observed);
+}
+
+@Override
+@Nonnull
+String getFuncName() {
+return "chi2";
+}
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/d3009be5/core/src/main/java/hivemall/ftvec/selection/DissociationDegreeUDF.java
--
diff --git 
a/core/src/main/java/hivemall/ftvec/selection/DissociationDegreeUDF.java 
b/core/src/main/java/hivemall/ftvec/selection/DissociationDegreeUDF.java
new file mode 100644
index 000..0acae82
--- /dev/null
+++ b/core/src/main/java/hivemall/ftvec/selection/DissociationDegreeUDF.java
@@ -0,0 +1,88 @@
+package hivemall.ftvec.selection;
+
+import hivemall.utils.hadoop.HiveUtils;
+import hivemall.utils.lang.Preconditions;
+import hivemall.utils.math.StatsUtils;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+import javax.annotation.Nonnull;
+
+@Description(name = "",
+value = "_FUNC_(array expected, array observed) - 
Returns dissociation degree as double")
+public abstract class DissociationDegreeUDF extends GenericUDF {
+private ListObjectInspector expectedOI;
+private DoubleObjectInspector expectedElO

[47/50] [abbrv] incubator-hivemall git commit: Fix syntax errors in spark (#387)

2016-12-01 Thread myui
Fix syntax errors in spark (#387)



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/4c8dcbfc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/4c8dcbfc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/4c8dcbfc

Branch: refs/heads/JIRA-22/pr-385
Commit: 4c8dcbfcdd9dd584fc97e28db39a12d12dfd7b48
Parents: 6549ef5
Author: Takeshi Yamamuro 
Authored: Thu Nov 24 03:13:25 2016 +0900
Committer: Makoto YUI 
Committed: Thu Nov 24 03:13:25 2016 +0900

--
 .../apache/spark/sql/hive/GroupedDataEx.scala   |  8 +--
 .../org/apache/spark/sql/hive/HivemallOps.scala |  6 +--
 .../spark/sql/hive/HivemallOpsSuite.scala   |  7 ++-
 .../spark/sql/hive/HivemallGroupedDataset.scala | 51 ++--
 .../spark/sql/hive/HivemallOpsSuite.scala   | 13 ++---
 5 files changed, 41 insertions(+), 44 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/4c8dcbfc/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala
--
diff --git 
a/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala 
b/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala
index 8f78a7f..dd6db6c 100644
--- 
a/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala
+++ 
b/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala
@@ -271,9 +271,11 @@ final class GroupedDataEx protected[sql](
*/
   def onehot_encoding(features: String*): DataFrame = {
 val udaf = HiveUDAFFunction(
-new HiveFunctionWrapper("hivemall.ftvec.trans.OnehotEncodingUDAF"),
-features.map(df.col(_).expr),
-isUDAFBridgeRequired = false)
+  new HiveFunctionWrapper("hivemall.ftvec.trans.OnehotEncodingUDAF"),
+  features.map(df.col(_).expr),
+  isUDAFBridgeRequired = false)
+toDF(Seq(Alias(udaf, udaf.prettyString)()))
+  }
 
   /**
* @see hivemall.ftvec.selection.SignalNoiseRatioUDAF

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/4c8dcbfc/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
--
diff --git 
a/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala 
b/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
index 27cffc7..8583e1c 100644
--- a/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
+++ b/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
@@ -1010,9 +1010,9 @@ object HivemallOps {
   }
 
   /**
-* @see hivemall.ftvec.selection.ChiSquareUDF
-* @group ftvec.selection
-*/
+   * @see hivemall.ftvec.selection.ChiSquareUDF
+   * @group ftvec.selection
+   */
   def chi2(observed: Column, expected: Column): Column = {
 HiveGenericUDF(new HiveFunctionWrapper(
   "hivemall.ftvec.selection.ChiSquareUDF"), Seq(observed.expr, 
expected.expr))

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/4c8dcbfc/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
--
diff --git 
a/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
 
b/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
index c231105..4c77f18 100644
--- 
a/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
+++ 
b/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.{Column, Row}
 import org.apache.spark.sql.hive.HivemallOps._
 import org.apache.spark.sql.hive.HivemallUtils._
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.{Column, Row}
 import org.apache.spark.test.HivemallQueryTest
 import org.apache.spark.test.TestDoubleWrapper._
 import org.apache.spark.test.TestUtils._
@@ -575,14 +574,13 @@ final class HivemallOpsSuite extends HivemallQueryTest {
 assert(row4(0).getDouble(1) ~== 0.25)
   }
 
-  test("user-defined aggregators for ftvec.trans") {
+  ignore("user-defined aggregators for ftvec.trans") {
 import hiveContext.implicits._
 
 val df0 = Seq((1, "cat", "mammal", 9), (1, "dog", "mammal", 10), (1, 
"human", "mammal", 10),
   (1, "seahawk", "bird", 101), (1, "wasp", "insect", 3), (1, "wasp", 
"insect", 9),
   (1, "cat", "mammal", 101), (1, "dog", "mammal", 1), (1, "human", 
"mammal", 9))
-.toDF("col0", "cat1", "cat2", "cat3")
-
+  .toDF("col0", "cat1", "cat2", "cat3")
 val row00 = df0.groupby($"col0").onehot_encoding("cat1")
 val row01 = df0.groupby($"col0").onehot_encodin

[45/50] [abbrv] incubator-hivemall git commit: Updated license headers

2016-12-01 Thread myui
Updated license headers

Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/e44a413e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/e44a413e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/e44a413e

Branch: refs/heads/JIRA-22/pr-385
Commit: e44a413e5fd4270af53895fceec27ccff3d63a73
Parents: 67ba963
Author: myui 
Authored: Mon Nov 21 19:02:27 2016 +0900
Committer: myui 
Committed: Mon Nov 21 19:02:27 2016 +0900

--
 .../hivemall/ftvec/selection/ChiSquareUDF.java  | 77 ++--
 .../ftvec/selection/SignalNoiseRatioUDAF.java   | 39 +-
 .../hivemall/tools/array/SelectKBestUDF.java| 48 ++--
 .../tools/matrix/TransposeAndDotUDAF.java   | 38 +-
 .../ftvec/selection/ChiSquareUDFTest.java   | 35 -
 .../selection/SignalNoiseRatioUDAFTest.java | 36 -
 .../tools/array/SelectKBeatUDFTest.java | 33 +
 .../tools/matrix/TransposeAndDotUDAFTest.java   | 29 
 8 files changed, 171 insertions(+), 164 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e44a413e/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
--
diff --git a/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java 
b/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
index 1583959..91742bc 100644
--- a/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
+++ b/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
@@ -1,20 +1,20 @@
 /*
- * Hivemall: Hive scalable Machine Learning Library
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
- * Copyright (C) 2016 Makoto YUI
- * Copyright (C) 2013-2015 National Institute of Advanced Industrial Science 
and Technology (AIST)
+ *   http://www.apache.org/licenses/LICENSE-2.0
  *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 package hivemall.ftvec.selection;
 
@@ -22,11 +22,18 @@ import hivemall.utils.hadoop.HiveUtils;
 import hivemall.utils.hadoop.WritableUtils;
 import hivemall.utils.lang.Preconditions;
 import hivemall.utils.math.StatsUtils;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -34,15 +41,12 @@ import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-
 @Description(name = "chi2",
 value = "_FUNC_(array> observed, array> 
expected)"
 + " - Returns chi2_val and p_val of each columns as 
, array>")
-public class ChiSquareUDF extends GenericUDF {
+@UDFType(deterministic = true, stateful = false)
+public final class ChiSquareUDF extends GenericUDF {
+
 private List

[01/50] [abbrv] incubator-hivemall git commit: add HiveUtils.isNumberListOI

2016-12-01 Thread myui
Repository: incubator-hivemall
Updated Branches:
  refs/heads/JIRA-22/pr-285 [created] 05766432c
  refs/heads/JIRA-22/pr-304 [created] 775ae4f79
  refs/heads/JIRA-22/pr-336 [created] f8d152cba
  refs/heads/JIRA-22/pr-356 [created] bb3250448
  refs/heads/JIRA-22/pr-385 [created] 4c8dcbfcd


add HiveUtils.isNumberListOI



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/2dc176a7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/2dc176a7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/2dc176a7

Branch: refs/heads/JIRA-22/pr-385
Commit: 2dc176a760b553214624e98f885a719ee196cc4e
Parents: 5a7df55
Author: amaya 
Authored: Fri Sep 16 15:46:44 2016 +0900
Committer: amaya 
Committed: Fri Sep 16 15:46:44 2016 +0900

--
 core/src/main/java/hivemall/utils/hadoop/HiveUtils.java | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/2dc176a7/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
--
diff --git a/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java 
b/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
index 4628ce1..32b60d0 100644
--- a/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
+++ b/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
@@ -189,8 +189,7 @@ public final class HiveUtils {
 return BOOLEAN_TYPE_NAME.equals(typeName);
 }
 
-public static boolean isNumberOI(@Nonnull final ObjectInspector argOI)
-throws UDFArgumentTypeException {
+public static boolean isNumberOI(@Nonnull final ObjectInspector argOI) {
 if (argOI.getCategory() != Category.PRIMITIVE) {
 return false;
 }
@@ -231,6 +230,10 @@ public final class HiveUtils {
 return category == Category.LIST;
 }
 
+public static boolean isNumberListOI(@Nonnull final ObjectInspector oi){
+return isListOI(oi) && 
isNumberOI(((ListObjectInspector)oi).getListElementObjectInspector());
+}
+
 public static boolean isPrimitiveTypeInfo(@Nonnull TypeInfo typeInfo) {
 return typeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE;
 }



[18/50] [abbrv] incubator-hivemall git commit: refine transpose_and_dot

2016-12-01 Thread myui
refine transpose_and_dot



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/abbf5492
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/abbf5492
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/abbf5492

Branch: refs/heads/JIRA-22/pr-385
Commit: abbf5492b95dd69e347580c59ac044a78627c547
Parents: a16a3fd
Author: amaya 
Authored: Wed Sep 21 13:11:00 2016 +0900
Committer: amaya 
Committed: Wed Sep 21 13:40:54 2016 +0900

--
 .../tools/matrix/TransposeAndDotUDAF.java   | 32 +++-
 1 file changed, 18 insertions(+), 14 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/abbf5492/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
--
diff --git a/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java 
b/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
index 1e54004..9d68f93 100644
--- a/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
+++ b/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
@@ -127,33 +127,37 @@ public final class TransposeAndDotUDAF extends 
AbstractGenericUDAFResolver {
 
 @Override
 public AbstractAggregationBuffer getNewAggregationBuffer() throws 
HiveException {
-TransposeAndDotAggregationBuffer myAgg = new 
TransposeAndDotAggregationBuffer();
+final TransposeAndDotAggregationBuffer myAgg = new 
TransposeAndDotAggregationBuffer();
 reset(myAgg);
 return myAgg;
 }
 
 @Override
 public void reset(AggregationBuffer agg) throws HiveException {
-TransposeAndDotAggregationBuffer myAgg = 
(TransposeAndDotAggregationBuffer) agg;
+final TransposeAndDotAggregationBuffer myAgg = 
(TransposeAndDotAggregationBuffer) agg;
 myAgg.reset();
 }
 
 @Override
 public void iterate(AggregationBuffer agg, Object[] parameters) throws 
HiveException {
-TransposeAndDotAggregationBuffer myAgg = 
(TransposeAndDotAggregationBuffer) agg;
+final Object matrix0RowObj = parameters[0];
+final Object matrix1RowObj = parameters[1];
 
+Preconditions.checkNotNull(matrix0RowObj);
+Preconditions.checkNotNull(matrix1RowObj);
+
+final TransposeAndDotAggregationBuffer myAgg = 
(TransposeAndDotAggregationBuffer) agg;
+
+// init
 if (matrix0Row == null) {
-matrix0Row = new 
double[matrix0RowOI.getListLength(parameters[0])];
+matrix0Row = new 
double[matrix0RowOI.getListLength(matrix0RowObj)];
 }
 if (matrix1Row == null) {
-matrix1Row = new 
double[matrix1RowOI.getListLength(parameters[1])];
+matrix1Row = new 
double[matrix1RowOI.getListLength(matrix1RowObj)];
 }
 
-HiveUtils.toDoubleArray(parameters[0], matrix0RowOI, matrix0ElOI, 
matrix0Row, false);
-HiveUtils.toDoubleArray(parameters[1], matrix1RowOI, matrix1ElOI, 
matrix1Row, false);
-
-Preconditions.checkNotNull(matrix0Row);
-Preconditions.checkNotNull(matrix1Row);
+HiveUtils.toDoubleArray(matrix0RowObj, matrix0RowOI, matrix0ElOI, 
matrix0Row, false);
+HiveUtils.toDoubleArray(matrix1RowObj, matrix1RowOI, matrix1ElOI, 
matrix1Row, false);
 
 if (myAgg.aggMatrix == null) {
 myAgg.init(matrix0Row.length, matrix1Row.length);
@@ -172,9 +176,9 @@ public final class TransposeAndDotUDAF extends 
AbstractGenericUDAFResolver {
 return;
 }
 
-TransposeAndDotAggregationBuffer myAgg = 
(TransposeAndDotAggregationBuffer) agg;
+final TransposeAndDotAggregationBuffer myAgg = 
(TransposeAndDotAggregationBuffer) agg;
 
-List matrix = aggMatrixOI.getList(other);
+final List matrix = aggMatrixOI.getList(other);
 final int n = matrix.size();
 final double[] row = new 
double[aggMatrixRowOI.getListLength(matrix.get(0))];
 for (int i = 0; i < n; i++) {
@@ -197,9 +201,9 @@ public final class TransposeAndDotUDAF extends 
AbstractGenericUDAFResolver {
 
 @Override
 public Object terminate(AggregationBuffer agg) throws HiveException {
-TransposeAndDotAggregationBuffer myAgg = 
(TransposeAndDotAggregationBuffer) agg;
+final TransposeAndDotAggregationBuffer myAgg = 
(TransposeAndDotAggregationBuffer) agg;
 
-List> result = new 
ArrayList>();
+final List> result = new 
ArrayList>();
 for (double[] row : myAgg.aggMatrix) {
 

[17/50] [abbrv] incubator-hivemall git commit: refine chi2

2016-12-01 Thread myui
refine chi2



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/a16a3fde
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/a16a3fde
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/a16a3fde

Branch: refs/heads/JIRA-22/pr-385
Commit: a16a3fde844ba381dee7eb1e9608ddc2dcfb96fc
Parents: 6dc2344
Author: amaya 
Authored: Wed Sep 21 13:10:18 2016 +0900
Committer: amaya 
Committed: Wed Sep 21 13:35:33 2016 +0900

--
 .../hivemall/ftvec/selection/ChiSquareUDF.java  | 40 +++--
 .../java/hivemall/utils/math/StatsUtils.java| 62 +++-
 2 files changed, 58 insertions(+), 44 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/a16a3fde/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
--
diff --git a/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java 
b/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
index e2b7494..951aeeb 100644
--- a/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
+++ b/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
@@ -50,6 +50,12 @@ public class ChiSquareUDF extends GenericUDF {
 private ListObjectInspector expectedRowOI;
 private PrimitiveObjectInspector expectedElOI;
 
+private int nFeatures = -1;
+private double[] observedRow = null; // to reuse
+private double[] expectedRow = null; // to reuse
+private double[][] observed = null; // shape = (#features, #classes)
+private double[][] expected = null; // shape = (#features, #classes)
+
 @Override
 public ObjectInspector initialize(ObjectInspector[] OIs) throws 
UDFArgumentException {
 if (OIs.length != 2) {
@@ -75,12 +81,12 @@ public class ChiSquareUDF extends GenericUDF {
 expectedRowOI = 
HiveUtils.asListOI(expectedOI.getListElementObjectInspector());
 expectedElOI = 
HiveUtils.asDoubleCompatibleOI(expectedRowOI.getListElementObjectInspector());
 
-List fieldOIs = new ArrayList();
+final List fieldOIs = new 
ArrayList();
 
fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector));
 
fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector));
 
 return ObjectInspectorFactory.getStandardStructObjectInspector(
-Arrays.asList("chi2_vals", "p_vals"), fieldOIs);
+Arrays.asList("chi2", "pvalue"), fieldOIs);
 }
 
 @Override
@@ -93,28 +99,28 @@ public class ChiSquareUDF extends GenericUDF {
 final int nClasses = observedObj.size();
 Preconditions.checkArgument(nClasses == expectedObj.size()); // same 
#rows
 
-int nFeatures = -1;
-double[] observedRow = null; // to reuse
-double[] expectedRow = null; // to reuse
-double[][] observed = null; // shape = (#features, #classes)
-double[][] expected = null; // shape = (#features, #classes)
-
 // explode and transpose matrix
 for (int i = 0; i < nClasses; i++) {
-if (i == 0) {
+final Object observedObjRow = observedObj.get(i);
+final Object expectedObjRow = observedObj.get(i);
+
+Preconditions.checkNotNull(observedObjRow);
+Preconditions.checkNotNull(expectedObjRow);
+
+if (observedRow == null) {
 // init
-observedRow = HiveUtils.asDoubleArray(observedObj.get(i), 
observedRowOI,
-observedElOI, false);
-expectedRow = HiveUtils.asDoubleArray(expectedObj.get(i), 
expectedRowOI,
-expectedElOI, false);
+observedRow = HiveUtils.asDoubleArray(observedObjRow, 
observedRowOI, observedElOI,
+false);
+expectedRow = HiveUtils.asDoubleArray(expectedObjRow, 
expectedRowOI, expectedElOI,
+false);
 nFeatures = observedRow.length;
 observed = new double[nFeatures][nClasses];
 expected = new double[nFeatures][nClasses];
 } else {
-HiveUtils.toDoubleArray(observedObj.get(i), observedRowOI, 
observedElOI,
-observedRow, false);
-HiveUtils.toDoubleArray(expectedObj.get(i), expectedRowOI, 
expectedElOI,
-expectedRow, false);
+HiveUtils.toDoubleArray(observedObjRow, observedRowOI, 
observedElOI, observedRow,
+false);
+HiveUtils.toDoubleArray(expectedObjRow, expectedRowOI, 
expectedElOI, expectedRow,
+false);
   

[12/50] [abbrv] incubator-hivemall git commit: Add optimizer implementations

2016-12-01 Thread myui
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/f81948c5/core/src/main/java/hivemall/optimizer/LossFunctions.java
--
diff --git a/core/src/main/java/hivemall/optimizer/LossFunctions.java 
b/core/src/main/java/hivemall/optimizer/LossFunctions.java
new file mode 100644
index 000..d11be9b
--- /dev/null
+++ b/core/src/main/java/hivemall/optimizer/LossFunctions.java
@@ -0,0 +1,467 @@
+/*
+ * Hivemall: Hive scalable Machine Learning Library
+ *
+ * Copyright (C) 2015 Makoto YUI
+ * Copyright (C) 2013-2015 National Institute of Advanced Industrial Science 
and Technology (AIST)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package hivemall.optimizer;
+
+import hivemall.utils.math.MathUtils;
+
+/**
+ * @link https://github.com/JohnLangford/vowpal_wabbit/wiki/Loss-functions
+ */
+public final class LossFunctions {
+
+public enum LossType {
+SquaredLoss, LogLoss, HingeLoss, SquaredHingeLoss, QuantileLoss, 
EpsilonInsensitiveLoss
+}
+
+public static LossFunction getLossFunction(String type) {
+if ("SquaredLoss".equalsIgnoreCase(type)) {
+return new SquaredLoss();
+} else if ("LogLoss".equalsIgnoreCase(type)) {
+return new LogLoss();
+} else if ("HingeLoss".equalsIgnoreCase(type)) {
+return new HingeLoss();
+} else if ("SquaredHingeLoss".equalsIgnoreCase(type)) {
+return new SquaredHingeLoss();
+} else if ("QuantileLoss".equalsIgnoreCase(type)) {
+return new QuantileLoss();
+} else if ("EpsilonInsensitiveLoss".equalsIgnoreCase(type)) {
+return new EpsilonInsensitiveLoss();
+}
+throw new IllegalArgumentException("Unsupported type: " + type);
+}
+
+public static LossFunction getLossFunction(LossType type) {
+switch (type) {
+case SquaredLoss:
+return new SquaredLoss();
+case LogLoss:
+return new LogLoss();
+case HingeLoss:
+return new HingeLoss();
+case SquaredHingeLoss:
+return new SquaredHingeLoss();
+case QuantileLoss:
+return new QuantileLoss();
+case EpsilonInsensitiveLoss:
+return new EpsilonInsensitiveLoss();
+default:
+throw new IllegalArgumentException("Unsupported type: " + 
type);
+}
+}
+
+public interface LossFunction {
+
+/**
+ * Evaluate the loss function.
+ *
+ * @param p The prediction, p = w^T x
+ * @param y The true value (aka target)
+ * @return The loss evaluated at `p` and `y`.
+ */
+public float loss(float p, float y);
+
+public double loss(double p, double y);
+
+/**
+ * Evaluate the derivative of the loss function with respect to the 
prediction `p`.
+ *
+ * @param p The prediction, p = w^T x
+ * @param y The true value (aka target)
+ * @return The derivative of the loss function w.r.t. `p`.
+ */
+public float dloss(float p, float y);
+
+public boolean forBinaryClassification();
+
+public boolean forRegression();
+
+}
+
+public static abstract class BinaryLoss implements LossFunction {
+
+protected static void checkTarget(float y) {
+if (!(y == 1.f || y == -1.f)) {
+throw new IllegalArgumentException("target must be [+1,-1]: " 
+ y);
+}
+}
+
+protected static void checkTarget(double y) {
+if (!(y == 1.d || y == -1.d)) {
+throw new IllegalArgumentException("target must be [+1,-1]: " 
+ y);
+}
+}
+
+@Override
+public boolean forBinaryClassification() {
+return true;
+}
+
+@Override
+public boolean forRegression() {
+return false;
+}
+}
+
+public static abstract class RegressionLoss implements LossFunction {
+
+@Override
+public boolean forBinaryClassification() {
+return false;
+}
+
+@Override
+public boolean forRegression() {
+return true;
+}
+
+}
+
+/**
+ * Squared loss for regression problems.
+ *
+ * If you're trying to minimize the mean error, use squared-loss.
+ */
+ 

[08/50] [abbrv] incubator-hivemall git commit: add array_top_k_indices

2016-12-01 Thread myui
add array_top_k_indices



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/e9d1a94f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/e9d1a94f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/e9d1a94f

Branch: refs/heads/JIRA-22/pr-385
Commit: e9d1a94f29f31e2910a54add7c2625825d715318
Parents: 7b07e4a
Author: amaya 
Authored: Tue Sep 20 16:55:57 2016 +0900
Committer: amaya 
Committed: Tue Sep 20 18:37:38 2016 +0900

--
 .../tools/array/ArrayTopKIndicesUDF.java| 96 
 1 file changed, 96 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/e9d1a94f/core/src/main/java/hivemall/tools/array/ArrayTopKIndicesUDF.java
--
diff --git a/core/src/main/java/hivemall/tools/array/ArrayTopKIndicesUDF.java 
b/core/src/main/java/hivemall/tools/array/ArrayTopKIndicesUDF.java
new file mode 100644
index 000..bf9fe15
--- /dev/null
+++ b/core/src/main/java/hivemall/tools/array/ArrayTopKIndicesUDF.java
@@ -0,0 +1,96 @@
+package hivemall.tools.array;
+
+import hivemall.utils.hadoop.HiveUtils;
+import hivemall.utils.lang.Preconditions;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.io.IntWritable;
+
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+
+@Description(name = "array_top_k_indices",
+value = "_FUNC_(array array, const int k) - Returns indices 
array of top-k as array")
+public class ArrayTopKIndicesUDF extends GenericUDF {
+private ListObjectInspector arrayOI;
+private PrimitiveObjectInspector elementOI;
+private PrimitiveObjectInspector kOI;
+
+@Override
+public ObjectInspector initialize(ObjectInspector[] OIs) throws 
UDFArgumentException {
+if (OIs.length != 2) {
+throw new UDFArgumentLengthException("Specify two or three 
arguments.");
+}
+
+if (!HiveUtils.isNumberListOI(OIs[0])) {
+throw new UDFArgumentTypeException(0, "Only array type 
argument is acceptable but "
++ OIs[0].getTypeName() + " was passed as `array`");
+}
+if (!HiveUtils.isIntegerOI(OIs[1])) {
+throw new UDFArgumentTypeException(1, "Only int type argument is 
acceptable but "
++ OIs[1].getTypeName() + " was passed as `k`");
+}
+
+arrayOI = HiveUtils.asListOI(OIs[0]);
+elementOI = 
HiveUtils.asDoubleCompatibleOI(arrayOI.getListElementObjectInspector());
+kOI = HiveUtils.asIntegerOI(OIs[1]);
+
+return ObjectInspectorFactory.getStandardListObjectInspector(
+PrimitiveObjectInspectorFactory.writableIntObjectInspector);
+}
+
+@Override
+public Object evaluate(GenericUDF.DeferredObject[] dObj) throws 
HiveException {
+final double[] array = HiveUtils.asDoubleArray(dObj[0].get(), arrayOI, 
elementOI);
+final int k = PrimitiveObjectInspectorUtils.getInt(dObj[1].get(), kOI);
+
+Preconditions.checkNotNull(array);
+Preconditions.checkArgument(array.length >= k);
+
+List> list = new 
ArrayList>();
+for (int i = 0; i < array.length; i++) {
+list.add(new AbstractMap.SimpleEntry(i, 
array[i]));
+}
+list.sort(new Comparator>() {
+@Override
+public int compare(Map.Entry o1, 
Map.Entry o2) {
+return o1.getValue() > o2.getValue() ? -1 : 1;
+}
+});
+
+List result = new ArrayList();
+for (int i = 0; i < k; i++) {
+result.add(new IntWritable(list.get(i).getKey()));
+}
+return result;
+}
+
+@Override
+public String getDisplayString(String[] children) {
+StringBuilder sb = new StringBuilder();
+sb.append("array_top_k_indices");
+sb.append("(");
+if 

[48/50] [abbrv] incubator-hivemall git commit: Merge branch 'sst-changepoint' of https://github.com/takuti/hivemall into JIRA-22/pr-356

2016-12-01 Thread myui
Merge branch 'sst-changepoint' of https://github.com/takuti/hivemall into 
JIRA-22/pr-356


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/bb325044
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/bb325044
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/bb325044

Branch: refs/heads/JIRA-22/pr-356
Commit: bb32504482db55ed5d946fefb25b4b88d2c36209
Parents: 72d6a62 998203d
Author: myui 
Authored: Fri Dec 2 15:33:01 2016 +0900
Committer: myui 
Committed: Fri Dec 2 15:33:01 2016 +0900

--
 .../anomaly/SingularSpectrumTransform.java  | 193 +++
 .../anomaly/SingularSpectrumTransformUDF.java   | 235 +++
 .../java/hivemall/utils/math/MatrixUtils.java   | 203 
 .../anomaly/SingularSpectrumTransformTest.java  | 146 
 .../hivemall/utils/math/MatrixUtilsTest.java|  67 ++
 5 files changed, 844 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bb325044/core/src/main/java/hivemall/utils/math/MatrixUtils.java
--

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/bb325044/core/src/test/java/hivemall/utils/math/MatrixUtilsTest.java
--



[07/50] [abbrv] incubator-hivemall git commit: change interface of chi2

2016-12-01 Thread myui
change interface of chi2



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/7b07e4a6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/7b07e4a6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/7b07e4a6

Branch: refs/heads/JIRA-22/pr-385
Commit: 7b07e4a6e1f700ba0a6e5b68659a040a3d89aa2f
Parents: d0e97e6
Author: amaya 
Authored: Tue Sep 20 12:03:44 2016 +0900
Committer: amaya 
Committed: Tue Sep 20 12:11:42 2016 +0900

--
 .../ftvec/selection/ChiSquareTestUDF.java   |  21 
 .../hivemall/ftvec/selection/ChiSquareUDF.java  | 124 +--
 .../ftvec/selection/DissociationDegreeUDF.java  |  88 -
 .../java/hivemall/utils/math/StatsUtils.java|  49 ++--
 4 files changed, 155 insertions(+), 127 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7b07e4a6/core/src/main/java/hivemall/ftvec/selection/ChiSquareTestUDF.java
--
diff --git a/core/src/main/java/hivemall/ftvec/selection/ChiSquareTestUDF.java 
b/core/src/main/java/hivemall/ftvec/selection/ChiSquareTestUDF.java
deleted file mode 100644
index d367085..000
--- a/core/src/main/java/hivemall/ftvec/selection/ChiSquareTestUDF.java
+++ /dev/null
@@ -1,21 +0,0 @@
-package hivemall.ftvec.selection;
-
-import hivemall.utils.math.StatsUtils;
-import org.apache.hadoop.hive.ql.exec.Description;
-
-import javax.annotation.Nonnull;
-
-@Description(name = "chi2_test",
-value = "_FUNC_(array expected, array observed) - 
Returns p-value as double")
-public class ChiSquareTestUDF extends DissociationDegreeUDF {
-@Override
-double calcDissociation(@Nonnull final double[] expected,@Nonnull final  
double[] observed) {
-return StatsUtils.chiSquareTest(expected, observed);
-}
-
-@Override
-@Nonnull
-String getFuncName() {
-return "chi2_test";
-}
-}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/7b07e4a6/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
--
diff --git a/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java 
b/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
index 937b1bd..1954e33 100644
--- a/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
+++ b/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
@@ -1,21 +1,131 @@
 package hivemall.ftvec.selection;
 
+import hivemall.utils.hadoop.HiveUtils;
+import hivemall.utils.hadoop.WritableUtils;
+import hivemall.utils.lang.Preconditions;
 import hivemall.utils.math.StatsUtils;
 import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 
 import javax.annotation.Nonnull;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
 
 @Description(name = "chi2",
-value = "_FUNC_(array expected, array observed) - 
Returns chi2-value as double")
-public class ChiSquareUDF extends DissociationDegreeUDF {
+value = "_FUNC_(array> observed, array> 
expected)" +
+" - Returns chi2_val and p_val of each columns as 
, array>")
+public class ChiSquareUDF extends GenericUDF {
+private ListObjectInspector observedOI;
+private ListObjectInspector observedRowOI;
+private PrimitiveObjectInspector observedElOI;
+private ListObjectInspector expectedOI;
+private ListObjectInspector expectedRowOI;
+private PrimitiveObjectInspector expectedElOI;
+
 @Override
-double calcDissociation(@Nonnull final double[] expected,@Nonnull final  
double[] observed) {
-return StatsUtils.chiSquare(expected, observed);
+public ObjectInspector initialize(ObjectInspector[] OIs) throws 
UDFArgumentException {
+if (OIs.length != 2) {
+throw new UDFArgument

[13/50] [abbrv] incubator-hivemall git commit: Add optimizer implementations

2016-12-01 Thread myui
Add optimizer implementations


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/f81948c5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/f81948c5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/f81948c5

Branch: refs/heads/JIRA-22/pr-285
Commit: f81948c5c7b83155eb29369a59f1fc65bb607f91
Parents: 5a7df55
Author: Takeshi YAMAMURO 
Authored: Mon May 2 23:43:42 2016 +0900
Committer: Takeshi YAMAMURO 
Committed: Wed Sep 21 00:07:28 2016 +0900

--
 .../src/main/java/hivemall/LearnerBaseUDTF.java |  22 +
 .../hivemall/classifier/AROWClassifierUDTF.java |   2 +-
 .../hivemall/classifier/AdaGradRDAUDTF.java | 123 +
 .../classifier/BinaryOnlineClassifierUDTF.java  |   3 +
 .../classifier/GeneralClassifierUDTF.java   | 121 +
 .../classifier/PassiveAggressiveUDTF.java   |   2 +-
 .../main/java/hivemall/common/EtaEstimator.java | 160 ---
 .../java/hivemall/common/LossFunctions.java | 467 ---
 .../java/hivemall/fm/FMHyperParameters.java |   2 +-
 .../hivemall/fm/FactorizationMachineModel.java  |   2 +-
 .../hivemall/fm/FactorizationMachineUDTF.java   |   8 +-
 .../fm/FieldAwareFactorizationMachineModel.java |   1 +
 .../hivemall/mf/BPRMatrixFactorizationUDTF.java |   2 +-
 .../hivemall/mf/MatrixFactorizationSGDUDTF.java |   2 +-
 .../main/java/hivemall/model/DenseModel.java|  87 +---
 .../main/java/hivemall/model/IWeightValue.java  |  16 +-
 .../java/hivemall/model/PredictionModel.java|   5 +-
 .../model/SpaceEfficientDenseModel.java |  93 +---
 .../main/java/hivemall/model/SparseModel.java   |  20 +-
 .../model/SynchronizedModelWrapper.java |  16 +-
 .../main/java/hivemall/model/WeightValue.java   | 162 ++-
 .../hivemall/model/WeightValueWithClock.java| 167 ++-
 .../optimizer/DenseOptimizerFactory.java| 215 +
 .../java/hivemall/optimizer/EtaEstimator.java   | 191 
 .../java/hivemall/optimizer/LossFunctions.java  | 467 +++
 .../main/java/hivemall/optimizer/Optimizer.java | 246 ++
 .../java/hivemall/optimizer/Regularization.java |  99 
 .../optimizer/SparseOptimizerFactory.java   | 171 +++
 .../hivemall/regression/AROWRegressionUDTF.java |   2 +-
 .../java/hivemall/regression/AdaDeltaUDTF.java  | 117 +
 .../java/hivemall/regression/AdaGradUDTF.java   | 118 +
 .../regression/GeneralRegressionUDTF.java   | 125 +
 .../java/hivemall/regression/LogressUDTF.java   |  63 +--
 .../PassiveAggressiveRegressionUDTF.java|   2 +-
 .../hivemall/regression/RegressionBaseUDTF.java |  14 +-
 .../java/hivemall/optimizer/OptimizerTest.java  | 172 +++
 .../java/hivemall/mix/server/MixServerTest.java |  14 +-
 resources/ddl/define-all-as-permanent.hive  |  13 +-
 resources/ddl/define-all.hive   |  12 +-
 39 files changed, 2301 insertions(+), 1223 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/f81948c5/core/src/main/java/hivemall/LearnerBaseUDTF.java
--
diff --git a/core/src/main/java/hivemall/LearnerBaseUDTF.java 
b/core/src/main/java/hivemall/LearnerBaseUDTF.java
index 4518cce..7fd5190 100644
--- a/core/src/main/java/hivemall/LearnerBaseUDTF.java
+++ b/core/src/main/java/hivemall/LearnerBaseUDTF.java
@@ -28,6 +28,9 @@ import hivemall.model.SparseModel;
 import hivemall.model.SynchronizedModelWrapper;
 import hivemall.model.WeightValue;
 import hivemall.model.WeightValue.WeightValueWithCovar;
+import hivemall.optimizer.DenseOptimizerFactory;
+import hivemall.optimizer.Optimizer;
+import hivemall.optimizer.SparseOptimizerFactory;
 import hivemall.utils.datetime.StopWatch;
 import hivemall.utils.hadoop.HadoopUtils;
 import hivemall.utils.hadoop.HiveUtils;
@@ -38,6 +41,7 @@ import java.io.BufferedReader;
 import java.io.File;
 import java.io.IOException;
 import java.util.List;
+import java.util.Map;
 
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
@@ -195,6 +199,24 @@ public abstract class LearnerBaseUDTF extends 
UDTFWithOptions {
 return model;
 }
 
+// If a model implements a optimizer, it must override this
+protected Map getOptimzierOptions() {
+return null;
+}
+
+protected Optimizer createOptimizer() {
+assert(!useCovariance());
+final Map options = getOptimzierOptions();
+if(options != null) {
+if (dense_model) {
+return DenseOptimizerFactory.create(model_dims, options);
+} else {
+return SparseOptimizerFactory.create(model_dims, options);
+}
+}
+return null;
+}
+
 protected MixClient configureMixClient(Stri

[46/50] [abbrv] incubator-hivemall git commit: Add feature selection gitbook (#386)

2016-12-01 Thread myui
Add feature selection gitbook (#386)



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/6549ef51
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/6549ef51
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/6549ef51

Branch: refs/heads/JIRA-22/pr-385
Commit: 6549ef5104883a9529dfd9fc52b2b24843076fbb
Parents: e44a413
Author: amaya 
Authored: Wed Nov 23 21:16:10 2016 +0900
Committer: Makoto YUI 
Committed: Wed Nov 23 21:16:10 2016 +0900

--
 docs/gitbook/SUMMARY.md |   2 +
 .../gitbook/ft_engineering/feature_selection.md | 151 +++
 2 files changed, 153 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/6549ef51/docs/gitbook/SUMMARY.md
--
diff --git a/docs/gitbook/SUMMARY.md b/docs/gitbook/SUMMARY.md
index c333c98..33bb46c 100644
--- a/docs/gitbook/SUMMARY.md
+++ b/docs/gitbook/SUMMARY.md
@@ -61,6 +61,8 @@
 * [Vectorize Features](ft_engineering/vectorizer.md)
 * [Quantify non-number features](ft_engineering/quantify.md)
 
+* [Feature selection](ft_engineering/feature_selection.md)
+
 ## Part IV - Evaluation
 
 * [Statistical evaluation of a prediction model](eval/stat_eval.md)

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/6549ef51/docs/gitbook/ft_engineering/feature_selection.md
--
diff --git a/docs/gitbook/ft_engineering/feature_selection.md 
b/docs/gitbook/ft_engineering/feature_selection.md
new file mode 100644
index 000..8b522c6
--- /dev/null
+++ b/docs/gitbook/ft_engineering/feature_selection.md
@@ -0,0 +1,151 @@
+
+
+Feature selection is the process which selects a subset consisting of 
influential features from miscellaneous ones.
+It is an important technique to **enhance results**, **shorten training time** 
and **make features human-understandable**.
+
+## Selecting methods supported by Hivemall
+* Chi-square (Chi2)
+* For non-negative data only
+* Signal Noise Ratio (SNR)
+* ~~Minimum Redundancy Maximum Relevance (mRMR)~~
+* Contributions are welcome!
+
+## Usage
+1. Create importance list for feature selection
+* chi2/SNR
+2. Filter features
+* Select top-k features based on importance list
+
+
+## Example - Chi2
+``` sql
+CREATE TABLE input (
+  X array, -- features
+  Y array -- binarized label
+);
+
+WITH stats AS (
+  SELECT
+-- [UDAF] transpose_and_dot(Y::array, 
X::array)::array>
+transpose_and_dot(Y, X) AS observed, -- array>, shape = 
(n_classes, n_features)
+array_sum(X) AS feature_count, -- n_features col vector, shape = (1, 
array)
+array_avg(Y) AS class_prob -- n_class col vector, shape = (1, 
array)
+  FROM
+input
+),
+test AS (
+  SELECT
+transpose_and_dot(class_prob, feature_count) AS expected -- 
array>, shape = (n_class, n_features)
+  FROM
+stats
+),
+chi2 AS (
+  SELECT
+-- [UDAF] chi2(observed::array>, 
expected::array>)::struct, array>
+chi2(observed, expected) AS chi2s -- struct, array>, 
each shape = (1, n_features)
+  FROM
+test JOIN stats;
+)
+SELECT
+  -- [UDF] select_k_best(X::array, importance_list::array 
k::int)::array
+  select_k_best(X, chi2s.chi2, $[k}) -- top-k feature selection based on chi2 
score
+FROM
+  input JOIN chi2;
+```
+
+
+## Example - SNR
+``` sql
+CREATE TABLE input (
+  X array, -- features
+  Y array -- binarized label
+);
+
+WITH snr AS (
+  -- [UDAF] snr(features::array, labels::array)::array
+  SELECT snr(X, Y) AS snr FROM input -- aggregated SNR as array, shape 
= (1, #features)
+)
+SELECT select_k_best(X, snr, ${k}) FROM input JOIN snr;
+```
+
+
+## UDF details
+### Common
+ [UDAF] `transpose_and_dot(X::array, 
Y::array)::array>`
+# Input
+
+| array X | array Y |
+| :-: | :-: |
+| a row of matrix | a row of matrix |
+# Output
+
+| array> dotted |
+| :-: |
+| `dot(X.T, Y)`, shape = (X.#cols, Y.#cols) |
+ [UDF] `select_k_best(X::array, importance_list::array 
k::int)::array`
+# Input
+
+| array X | array importance list | int k |
+| :-: | :-: | :-: |
+| array | the larger, the more important | top-? |
+# Output
+
+| array> k-best elements |
+| :-: |
+| top-k elements from X based on indices of importance list |
+
+ Note
+- Current implementation expects **_ALL each `importance_list` and `k` are 
equal**_. It maybe confuse us.
+  - Future WA: add option showing use of common `importance_list` and `k`
+
+
+### Chi2
+ [UDF] `chi2(observed::array>, 
expected::array>)::struct, array>`
+# Input
+
+both `observed` and `expected`, shape = (#classes, #features)
+
+| array observed | array expected |
+| :-: | :-: |
+| observed features | expected features, `dot(class_

[11/50] [abbrv] incubator-hivemall git commit: add ddl definitions

2016-12-01 Thread myui
add ddl definitions



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/be1ea37a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/be1ea37a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/be1ea37a

Branch: refs/heads/JIRA-22/pr-385
Commit: be1ea37a0f5048cde4284107c04e109f0f526b42
Parents: ad81b3a
Author: amaya 
Authored: Tue Sep 20 18:00:49 2016 +0900
Committer: amaya 
Committed: Tue Sep 20 18:38:01 2016 +0900

--
 resources/ddl/define-all-as-permanent.hive | 20 
 resources/ddl/define-all.hive  | 20 
 resources/ddl/define-all.spark | 20 
 resources/ddl/define-udfs.td.hql   |  4 
 4 files changed, 64 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/be1ea37a/resources/ddl/define-all-as-permanent.hive
--
diff --git a/resources/ddl/define-all-as-permanent.hive 
b/resources/ddl/define-all-as-permanent.hive
index bab5a29..52b73a0 100644
--- a/resources/ddl/define-all-as-permanent.hive
+++ b/resources/ddl/define-all-as-permanent.hive
@@ -202,6 +202,13 @@ CREATE FUNCTION zscore as 
'hivemall.ftvec.scaling.ZScoreUDF' USING JAR '${hivema
 DROP FUNCTION IF EXISTS l2_normalize;
 CREATE FUNCTION l2_normalize as 'hivemall.ftvec.scaling.L2NormalizationUDF' 
USING JAR '${hivemall_jar}';
 
+-
+-- selection functions --
+-
+
+DROP FUNCTION IF EXISTS chi_square;
+CREATE FUNCTION chi_square as 'hivemall.ftvec.selection.ChiSquareUDF' USING 
JAR '${hivemall_jar}';
+
 
 -- misc functions --
 
@@ -364,6 +371,9 @@ CREATE FUNCTION subarray_endwith as 
'hivemall.tools.array.SubarrayEndWithUDF' US
 DROP FUNCTION IF EXISTS subarray_startwith;
 CREATE FUNCTION subarray_startwith as 
'hivemall.tools.array.SubarrayStartWithUDF' USING JAR '${hivemall_jar}';
 
+DROP FUNCTION IF EXISTS subarray_by_indices;
+CREATE FUNCTION subarray_by_indices as 
'hivemall.tools.array.SubarrayByIndicesUDF' USING JAR '${hivemall_jar}';
+
 DROP FUNCTION IF EXISTS array_concat;
 CREATE FUNCTION array_concat as 'hivemall.tools.array.ArrayConcatUDF' USING 
JAR '${hivemall_jar}';
 
@@ -380,6 +390,9 @@ CREATE FUNCTION array_avg as 
'hivemall.tools.array.ArrayAvgGenericUDAF' USING JA
 DROP FUNCTION IF EXISTS array_sum;
 CREATE FUNCTION array_sum as 'hivemall.tools.array.ArraySumUDAF' USING JAR 
'${hivemall_jar}';
 
+DROP FUNCTION array_top_k_indices;
+CREATE FUNCTION array_top_k_indices as 
'hivemall.tools.array.ArrayTopKIndicesUDF' USING JAR '${hivemall_jar}';
+
 DROP FUNCTION IF EXISTS to_string_array;
 CREATE FUNCTION to_string_array as 'hivemall.tools.array.ToStringArrayUDF' 
USING JAR '${hivemall_jar}';
 
@@ -436,6 +449,13 @@ DROP FUNCTION IF EXISTS sigmoid;
 CREATE FUNCTION sigmoid as 'hivemall.tools.math.SigmoidGenericUDF' USING JAR 
'${hivemall_jar}';
 
 --
+-- Matrix functions --
+--
+
+DROP FUNCTION IF EXISTS transpose_and_dot;
+CREATE FUNCTION transpose_and_dot as 
'hivemall.tools.matrix.TransposeAndDotUDAF' USING JAR '${hivemall_jar}';
+
+--
 -- mapred functions --
 --
 

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/be1ea37a/resources/ddl/define-all.hive
--
diff --git a/resources/ddl/define-all.hive b/resources/ddl/define-all.hive
index 315b4d2..a70ae0f 100644
--- a/resources/ddl/define-all.hive
+++ b/resources/ddl/define-all.hive
@@ -198,6 +198,13 @@ create temporary function zscore as 
'hivemall.ftvec.scaling.ZScoreUDF';
 drop temporary function l2_normalize;
 create temporary function l2_normalize as 
'hivemall.ftvec.scaling.L2NormalizationUDF';
 
+-
+-- selection functions --
+-
+
+drop temporary function chi_square;
+create temporary function chi_square as 
'hivemall.ftvec.selection.ChiSquareUDF';
+
 ---
 -- Feature engineering functions --
 ---
@@ -360,6 +367,9 @@ create temporary function subarray_endwith as 
'hivemall.tools.array.SubarrayEndW
 drop temporary function subarray_startwith;
 create temporary function subarray_startwith as 
'hivemall.tools.array.SubarrayStartWithUDF';
 
+drop temporary function subarray_by_indices;
+create temporary function subarray_by_indices as 
'hivemall.tools.array.SubarrayByIndicesUDF';
+
 drop temporary function array_concat;
 create temporary function array_concat as 
'hivemall.tools.array.ArrayConcatUDF';
 
@@ -376,6 +386,9 @@ create temporary function array_avg as 
'hivemall.tools.array.

[44/50] [abbrv] incubator-hivemall git commit: Merge branch 'feature/feature_selection' of https://github.com/amaya382/hivemall into feature_selection

2016-12-01 Thread myui
Merge branch 'feature/feature_selection' of
https://github.com/amaya382/hivemall into feature_selection

# Conflicts:
#   core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
#   core/src/main/java/hivemall/utils/math/StatsUtils.java
#   
spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala
#   
spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
#   
spark/spark-2.0/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
#   
spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/67ba9631
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/67ba9631
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/67ba9631

Branch: refs/heads/JIRA-22/pr-385
Commit: 67ba9631af3c231b7abd145134d17237b6aca0a5
Parents: 69496fa ce4a489
Author: myui 
Authored: Mon Nov 21 18:19:45 2016 +0900
Committer: myui 
Committed: Mon Nov 21 18:19:45 2016 +0900

--
 .../hivemall/ftvec/selection/ChiSquareUDF.java  | 155 
 .../ftvec/selection/SignalNoiseRatioUDAF.java   | 349 +++
 .../hivemall/tools/array/SelectKBestUDF.java| 143 
 .../tools/matrix/TransposeAndDotUDAF.java   | 213 +++
 .../java/hivemall/utils/hadoop/HiveUtils.java   |  22 +-
 .../java/hivemall/utils/math/StatsUtils.java|  91 +
 .../ftvec/selection/ChiSquareUDFTest.java   |  80 +
 .../selection/SignalNoiseRatioUDAFTest.java | 348 ++
 .../tools/array/SelectKBeatUDFTest.java |  65 
 .../tools/matrix/TransposeAndDotUDAFTest.java   |  58 +++
 resources/ddl/define-all-as-permanent.hive  |  20 ++
 resources/ddl/define-all.hive   |  20 ++
 resources/ddl/define-all.spark  |  20 ++
 resources/ddl/define-udfs.td.hql|   4 +
 .../apache/spark/sql/hive/GroupedDataEx.scala   |  21 ++
 .../org/apache/spark/sql/hive/HivemallOps.scala |  18 +
 .../spark/sql/hive/HivemallOpsSuite.scala   | 100 ++
 .../spark/sql/hive/HivemallGroupedDataset.scala |  25 ++
 .../org/apache/spark/sql/hive/HivemallOps.scala |  20 ++
 .../spark/sql/hive/HivemallOpsSuite.scala   | 103 ++
 20 files changed, 1873 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/67ba9631/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
--
diff --cc core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
index d8b1aef,c752188..8188b7a
--- a/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
+++ b/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
@@@ -242,10 -240,16 +242,20 @@@ public final class HiveUtils 
  return category == Category.LIST;
  }
  
 +public static boolean isMapOI(@Nonnull final ObjectInspector oi) {
 +return oi.getCategory() == Category.MAP;
 +}
 +
+ public static boolean isNumberListOI(@Nonnull final ObjectInspector oi) {
+ return isListOI(oi)
+ && isNumberOI(((ListObjectInspector) 
oi).getListElementObjectInspector());
+ }
+ 
+ public static boolean isNumberListListOI(@Nonnull final ObjectInspector 
oi) {
+ return isListOI(oi)
+ && isNumberListOI(((ListObjectInspector) 
oi).getListElementObjectInspector());
+ }
+ 
  public static boolean isPrimitiveTypeInfo(@Nonnull TypeInfo typeInfo) {
  return typeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE;
  }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/67ba9631/core/src/main/java/hivemall/utils/math/StatsUtils.java
--

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/67ba9631/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala
--
diff --cc 
spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala
index fd4da64,2482c62..8f78a7f
--- 
a/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala
+++ 
b/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala
@@@ -267,13 -266,25 +267,34 @@@ final class GroupedDataEx protected[sql
}
  
/**
 +   * @see hivemall.ftvec.trans.OnehotEncodingUDAF
 +   */
 +  def onehot_encoding(features: String*): DataFrame = {
 +val udaf = HiveUDAFFunction(
 +new HiveFunctionWrapper("hivemall.ftvec.trans.OnehotEncodingUDAF"),
 +features.map(df.col(_).expr),
 +isUDAFBridgeRequired = false)
++
++  /**
+* @see hivemall.ftvec.selection.

[19/50] [abbrv] incubator-hivemall git commit: fix chi2

2016-12-01 Thread myui
fix chi2



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/b8cf3968
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/b8cf3968
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/b8cf3968

Branch: refs/heads/JIRA-22/pr-385
Commit: b8cf39684496f2511e59294041d443b9438394a9
Parents: abbf549
Author: amaya 
Authored: Wed Sep 21 15:02:12 2016 +0900
Committer: amaya 
Committed: Wed Sep 21 16:23:42 2016 +0900

--
 core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b8cf3968/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
--
diff --git a/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java 
b/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
index 951aeeb..70f0316 100644
--- a/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
+++ b/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java
@@ -102,7 +102,7 @@ public class ChiSquareUDF extends GenericUDF {
 // explode and transpose matrix
 for (int i = 0; i < nClasses; i++) {
 final Object observedObjRow = observedObj.get(i);
-final Object expectedObjRow = observedObj.get(i);
+final Object expectedObjRow = expectedObj.get(i);
 
 Preconditions.checkNotNull(observedObjRow);
 Preconditions.checkNotNull(expectedObjRow);



[06/50] [abbrv] incubator-hivemall git commit: add HiveUtils.isNumberListListOI

2016-12-01 Thread myui
add HiveUtils.isNumberListListOI



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/d0e97e6f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/d0e97e6f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/d0e97e6f

Branch: refs/heads/JIRA-22/pr-385
Commit: d0e97e6ff71b2072ec5235cc3ac169162d59da59
Parents: d8f1005
Author: amaya 
Authored: Tue Sep 20 12:02:28 2016 +0900
Committer: amaya 
Committed: Tue Sep 20 12:02:28 2016 +0900

--
 core/src/main/java/hivemall/utils/hadoop/HiveUtils.java | 4 
 1 file changed, 4 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/d0e97e6f/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
--
diff --git a/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java 
b/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
index 7e8ea7b..dcbf534 100644
--- a/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
+++ b/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
@@ -235,6 +235,10 @@ public final class HiveUtils {
 return isListOI(oi) && 
isNumberOI(((ListObjectInspector)oi).getListElementObjectInspector());
 }
 
+public static boolean isNumberListListOI(@Nonnull final ObjectInspector 
oi) {
+return isListOI(oi) && 
isNumberListOI(((ListObjectInspector)oi).getListElementObjectInspector());
+}
+
 public static boolean isPrimitiveTypeInfo(@Nonnull TypeInfo typeInfo) {
 return typeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE;
 }



[incubator-hivemall] Git Push Summary

2016-12-01 Thread myui
Repository: incubator-hivemall
Updated Tags:  refs/tags/v0.4.2-rc.2 [created] e1df0504d


[05/50] [abbrv] incubator-hivemall git commit: mod number format

2016-12-01 Thread myui
mod number format



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/d8f1005b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/d8f1005b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/d8f1005b

Branch: refs/heads/JIRA-22/pr-385
Commit: d8f1005bb9fbf769b117290582bed18d7607a94a
Parents: d3009be
Author: amaya 
Authored: Tue Sep 20 12:01:46 2016 +0900
Committer: amaya 
Committed: Tue Sep 20 12:01:46 2016 +0900

--
 .../hivemall/tools/matrix/TransposeAndDotUDAF.java|  2 +-
 .../src/main/java/hivemall/utils/math/StatsUtils.java | 14 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/d8f1005b/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
--
diff --git a/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java 
b/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
index 4fa5ce4..3dcbb93 100644
--- a/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
+++ b/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java
@@ -81,7 +81,7 @@ public final class TransposeAndDotUDAF extends 
AbstractGenericUDAFResolver {
 public void reset() {
 if (aggMatrix != null) {
 for (double[] row : aggMatrix) {
-Arrays.fill(row, 0.0);
+Arrays.fill(row, 0.d);
 }
 }
 }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/d8f1005b/core/src/main/java/hivemall/utils/math/StatsUtils.java
--
diff --git a/core/src/main/java/hivemall/utils/math/StatsUtils.java 
b/core/src/main/java/hivemall/utils/math/StatsUtils.java
index ffccea3..7633419 100644
--- a/core/src/main/java/hivemall/utils/math/StatsUtils.java
+++ b/core/src/main/java/hivemall/utils/math/StatsUtils.java
@@ -198,22 +198,22 @@ public final class StatsUtils {
 public static double chiSquare(@Nonnull final double[] expected, @Nonnull 
final double[] observed) {
 Preconditions.checkArgument(expected.length == observed.length);
 
-double sumExpected = 0.0D;
-double sumObserved = 0.0D;
+double sumExpected = 0.d;
+double sumObserved = 0.d;
 
 for (int ratio = 0; ratio < observed.length; ++ratio) {
 sumExpected += expected[ratio];
 sumObserved += observed[ratio];
 }
 
-double var15 = 1.0D;
+double var15 = 1.d;
 boolean rescale = false;
-if (Math.abs(sumExpected - sumObserved) > 1.0E-5D) {
+if (Math.abs(sumExpected - sumObserved) > 1.e-5) {
 var15 = sumObserved / sumExpected;
 rescale = true;
 }
 
-double sumSq = 0.0D;
+double sumSq = 0.d;
 
 for (int i = 0; i < observed.length; ++i) {
 double dev;
@@ -235,7 +235,7 @@ public final class StatsUtils {
  * @return p-value
  */
 public static double chiSquareTest(@Nonnull final double[] 
expected,@Nonnull final double[] observed) {
-ChiSquaredDistribution distribution = new ChiSquaredDistribution(null, 
(double)expected.length - 1.0D);
-return 1.0D - distribution.cumulativeProbability(chiSquare(expected, 
observed));
+ChiSquaredDistribution distribution = new ChiSquaredDistribution(null, 
(double)expected.length - 1.d);
+return 1.d - distribution.cumulativeProbability(chiSquare(expected, 
observed));
 }
 }



[02/50] [abbrv] incubator-hivemall git commit: add HiveUtils.asDoubleOI

2016-12-01 Thread myui
add HiveUtils.asDoubleOI



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/56adf2d4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/56adf2d4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/56adf2d4

Branch: refs/heads/JIRA-22/pr-385
Commit: 56adf2d4e8b2591c31b846b8980016d3dafdbacc
Parents: 2dc176a
Author: amaya 
Authored: Fri Sep 16 15:48:33 2016 +0900
Committer: amaya 
Committed: Fri Sep 16 15:48:33 2016 +0900

--
 core/src/main/java/hivemall/utils/hadoop/HiveUtils.java | 9 +
 1 file changed, 9 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/56adf2d4/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
--
diff --git a/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java 
b/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
index 32b60d0..7e8ea7b 100644
--- a/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
+++ b/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java
@@ -57,6 +57,7 @@ import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
@@ -675,6 +676,14 @@ public final class HiveUtils {
 return (LongObjectInspector) argOI;
 }
 
+public static DoubleObjectInspector asDoubleOI(@Nonnull final 
ObjectInspector argOI)
+throws UDFArgumentException {
+if (!DOUBLE_TYPE_NAME.equals(argOI.getTypeName())) {
+throw new UDFArgumentException("Argument type must be DOUBLE: " + 
argOI.getTypeName());
+}
+return (DoubleObjectInspector) argOI;
+}
+
 public static PrimitiveObjectInspector asIntCompatibleOI(@Nonnull final 
ObjectInspector argOI)
 throws UDFArgumentTypeException {
 if (argOI.getCategory() != Category.PRIMITIVE) {



[15/50] [abbrv] incubator-hivemall git commit: change to select_k_best

2016-12-01 Thread myui
change to select_k_best



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/89c81aac
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/89c81aac
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/89c81aac

Branch: refs/heads/JIRA-22/pr-385
Commit: 89c81aacf5b13f6e125723cb5c70574c10ae
Parents: be1ea37
Author: amaya 
Authored: Wed Sep 21 10:56:59 2016 +0900
Committer: amaya 
Committed: Wed Sep 21 13:35:16 2016 +0900

--
 .../tools/array/ArrayTopKIndicesUDF.java| 115 ---
 .../hivemall/tools/array/SelectKBestUDF.java| 143 +++
 .../tools/array/SubarrayByIndicesUDF.java   | 111 --
 resources/ddl/define-all-as-permanent.hive  |   9 +-
 resources/ddl/define-all.hive   |   9 +-
 resources/ddl/define-all.spark  |   7 +-
 resources/ddl/define-udfs.td.hql|   3 +-
 7 files changed, 152 insertions(+), 245 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/89c81aac/core/src/main/java/hivemall/tools/array/ArrayTopKIndicesUDF.java
--
diff --git a/core/src/main/java/hivemall/tools/array/ArrayTopKIndicesUDF.java 
b/core/src/main/java/hivemall/tools/array/ArrayTopKIndicesUDF.java
deleted file mode 100644
index f895f9b..000
--- a/core/src/main/java/hivemall/tools/array/ArrayTopKIndicesUDF.java
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Hivemall: Hive scalable Machine Learning Library
- *
- * Copyright (C) 2016 Makoto YUI
- * Copyright (C) 2013-2015 National Institute of Advanced Industrial Science 
and Technology (AIST)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package hivemall.tools.array;
-
-import hivemall.utils.hadoop.HiveUtils;
-import hivemall.utils.lang.Preconditions;
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.io.IntWritable;
-
-import java.util.AbstractMap;
-import java.util.ArrayList;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Map;
-
-@Description(
-name = "array_top_k_indices",
-value = "_FUNC_(array array, const int k) - Returns indices 
array of top-k as array")
-public class ArrayTopKIndicesUDF extends GenericUDF {
-private ListObjectInspector arrayOI;
-private PrimitiveObjectInspector elementOI;
-private PrimitiveObjectInspector kOI;
-
-@Override
-public ObjectInspector initialize(ObjectInspector[] OIs) throws 
UDFArgumentException {
-if (OIs.length != 2) {
-throw new UDFArgumentLengthException("Specify two or three 
arguments.");
-}
-
-if (!HiveUtils.isNumberListOI(OIs[0])) {
-throw new UDFArgumentTypeException(0,
-"Only array type argument is acceptable but " + 
OIs[0].getTypeName()
-+ " was passed as `array`");
-}
-if (!HiveUtils.isIntegerOI(OIs[1])) {
-throw new UDFArgumentTypeException(1, "Only int type argument is 
acceptable but "
-+ OIs[1].getTypeName() + " was passed as `k`");
-}
-
-arrayOI = HiveUtils.asListOI(OIs[0]);
-elementOI = 
HiveUtils.asDoubleCompatibleOI(arrayOI.getListElementObjectInspector());
-kOI = HiveUtils.asIntegerOI(OIs[1]);
-
-return 
ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writab

[incubator-hivemall] Git Push Summary

2016-12-01 Thread myui
Repository: incubator-hivemall
Updated Tags:  refs/tags/v0.5-alpha.1 [created] 2a66cf620


[49/50] [abbrv] incubator-hivemall git commit: Merge branch 'feature/systemtest' of https://github.com/amaya382/hivemall into JIRA-22/pr-336

2016-12-01 Thread myui
Merge branch 'feature/systemtest' of https://github.com/amaya382/hivemall into 
JIRA-22/pr-336


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/f8d152cb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/f8d152cb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/f8d152cb

Branch: refs/heads/JIRA-22/pr-336
Commit: f8d152cba17f3f96a897d9eef5bb70722f4bc7c7
Parents: 72d6a62 798ec6a
Author: myui 
Authored: Fri Dec 2 15:33:12 2016 +0900
Committer: myui 
Committed: Fri Dec 2 15:33:12 2016 +0900

--
 pom.xml |   1 +
 systemtest/README.md| 211 +++
 systemtest/pom.xml  | 105 ++
 .../java/com/klarna/hiverunner/Extractor.java   |  33 ++
 .../hivemall/systemtest/MsgpackConverter.java   | 114 ++
 .../exception/QueryExecutionException.java  |  27 ++
 .../systemtest/model/CreateTableHQ.java |  49 +++
 .../hivemall/systemtest/model/DropTableHQ.java  |  27 ++
 .../main/java/hivemall/systemtest/model/HQ.java | 161 
 .../java/hivemall/systemtest/model/HQBase.java  |  22 ++
 .../hivemall/systemtest/model/InsertHQ.java |  47 +++
 .../java/hivemall/systemtest/model/RawHQ.java   |  30 ++
 .../java/hivemall/systemtest/model/TableHQ.java |  30 ++
 .../hivemall/systemtest/model/TableListHQ.java  |  23 ++
 .../model/UploadFileAsNewTableHQ.java   |  35 ++
 .../hivemall/systemtest/model/UploadFileHQ.java |  57 +++
 .../model/UploadFileToExistingHQ.java   |  28 ++
 .../model/lazy/LazyMatchingResource.java|  63 
 .../systemtest/runner/HiveSystemTestRunner.java | 142 
 .../systemtest/runner/SystemTestCommonInfo.java |  46 +++
 .../systemtest/runner/SystemTestRunner.java | 337 +
 .../systemtest/runner/SystemTestTeam.java   | 183 ++
 .../systemtest/runner/TDSystemTestRunner.java   | 363 +++
 .../main/java/hivemall/systemtest/utils/IO.java |  83 +
 .../resources/hivemall/hiverunner.properties|   6 +
 .../src/test/resources/hivemall/td.properties   |  13 +
 26 files changed, 2236 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/f8d152cb/pom.xml
--



[50/50] [abbrv] incubator-hivemall git commit: Merge branch 'AddOptimizers' of https://github.com/maropu/hivemall into JIRA-22/pr-285

2016-12-01 Thread myui
Merge branch 'AddOptimizers' of https://github.com/maropu/hivemall into 
JIRA-22/pr-285


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/05766432
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/05766432
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/05766432

Branch: refs/heads/JIRA-22/pr-285
Commit: 05766432c45f89627e423245e5aec3ced6d0c100
Parents: 775ae4f 3620eb8
Author: myui 
Authored: Fri Dec 2 15:35:05 2016 +0900
Committer: myui 
Committed: Fri Dec 2 15:35:05 2016 +0900

--
 .../src/main/java/hivemall/LearnerBaseUDTF.java |  55 +++
 .../hivemall/classifier/AROWClassifierUDTF.java |   2 +-
 .../hivemall/classifier/AdaGradRDAUDTF.java |   6 +-
 .../classifier/BinaryOnlineClassifierUDTF.java  |  13 +
 .../classifier/GeneralClassifierUDTF.java   | 122 +
 .../classifier/PassiveAggressiveUDTF.java   |   2 +-
 .../main/java/hivemall/common/EtaEstimator.java | 160 ---
 .../java/hivemall/common/LossFunctions.java | 467 ---
 .../java/hivemall/fm/FMHyperParameters.java |   2 +-
 .../hivemall/fm/FactorizationMachineModel.java  |   2 +-
 .../hivemall/fm/FactorizationMachineUDTF.java   |   8 +-
 .../fm/FieldAwareFactorizationMachineModel.java |   1 +
 .../hivemall/mf/BPRMatrixFactorizationUDTF.java |   2 +-
 .../hivemall/mf/MatrixFactorizationSGDUDTF.java |   2 +-
 .../main/java/hivemall/model/DenseModel.java|   5 +
 .../main/java/hivemall/model/IWeightValue.java  |  16 +-
 .../main/java/hivemall/model/NewDenseModel.java | 293 
 .../model/NewSpaceEfficientDenseModel.java  | 317 +
 .../java/hivemall/model/NewSparseModel.java | 197 
 .../java/hivemall/model/PredictionModel.java|   2 +
 .../model/SpaceEfficientDenseModel.java |   5 +
 .../main/java/hivemall/model/SparseModel.java   |   5 +
 .../model/SynchronizedModelWrapper.java |  10 +
 .../main/java/hivemall/model/WeightValue.java   | 162 ++-
 .../hivemall/model/WeightValueWithClock.java| 167 ++-
 .../optimizer/DenseOptimizerFactory.java| 215 +
 .../java/hivemall/optimizer/EtaEstimator.java   | 191 
 .../java/hivemall/optimizer/LossFunctions.java  | 467 +++
 .../main/java/hivemall/optimizer/Optimizer.java | 246 ++
 .../java/hivemall/optimizer/Regularization.java |  99 
 .../optimizer/SparseOptimizerFactory.java   | 171 +++
 .../hivemall/regression/AROWRegressionUDTF.java |   2 +-
 .../java/hivemall/regression/AdaDeltaUDTF.java  |   5 +-
 .../java/hivemall/regression/AdaGradUDTF.java   |   5 +-
 .../regression/GeneralRegressionUDTF.java   | 126 +
 .../java/hivemall/regression/LogressUDTF.java   |  10 +-
 .../PassiveAggressiveRegressionUDTF.java|   2 +-
 .../hivemall/regression/RegressionBaseUDTF.java |  26 +-
 .../NewSpaceEfficientNewDenseModelTest.java |  60 +++
 .../model/SpaceEfficientDenseModelTest.java |  60 ---
 .../java/hivemall/optimizer/OptimizerTest.java  | 172 +++
 .../java/hivemall/mix/server/MixServerTest.java |  18 +-
 resources/ddl/define-all-as-permanent.hive  |  13 +-
 resources/ddl/define-all.hive   |  12 +-
 .../hivemall/mix/server/MixServerSuite.scala|   6 +-
 45 files changed, 3195 insertions(+), 734 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/05766432/core/src/main/java/hivemall/LearnerBaseUDTF.java
--

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/05766432/core/src/main/java/hivemall/classifier/AROWClassifierUDTF.java
--

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/05766432/core/src/main/java/hivemall/classifier/AdaGradRDAUDTF.java
--

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/05766432/core/src/main/java/hivemall/classifier/BinaryOnlineClassifierUDTF.java
--

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/05766432/core/src/main/java/hivemall/classifier/PassiveAggressiveUDTF.java
--

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/05766432/core/src/main/java/hivemall/fm/FMHyperParameters.java
--

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/05766432/core/src/main/java/hivemall/fm/FactorizationMachineModel.java
--

http://git-wip-us.apach

[09/50] [abbrv] incubator-hivemall git commit: add subarray_by_indices

2016-12-01 Thread myui
add subarray_by_indices



Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/1ab9b097
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/1ab9b097
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/1ab9b097

Branch: refs/heads/JIRA-22/pr-385
Commit: 1ab9b0974ca4203c00175469b7b75d5b65209547
Parents: e9d1a94
Author: amaya 
Authored: Tue Sep 20 16:56:15 2016 +0900
Committer: amaya 
Committed: Tue Sep 20 18:37:46 2016 +0900

--
 .../tools/array/SubarrayByIndicesUDF.java   | 93 
 1 file changed, 93 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1ab9b097/core/src/main/java/hivemall/tools/array/SubarrayByIndicesUDF.java
--
diff --git a/core/src/main/java/hivemall/tools/array/SubarrayByIndicesUDF.java 
b/core/src/main/java/hivemall/tools/array/SubarrayByIndicesUDF.java
new file mode 100644
index 000..f476589
--- /dev/null
+++ b/core/src/main/java/hivemall/tools/array/SubarrayByIndicesUDF.java
@@ -0,0 +1,93 @@
+package hivemall.tools.array;
+
+
+import hivemall.utils.hadoop.HiveUtils;
+import hivemall.utils.lang.Preconditions;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+
+import java.util.ArrayList;
+import java.util.List;
+
+@Description(name = "subarray_by_indices",
+value = "_FUNC_(array input, array indices)" +
+" - Returns subarray selected by given indices as 
array")
+public class SubarrayByIndicesUDF extends GenericUDF {
+private ListObjectInspector inputOI;
+private PrimitiveObjectInspector elementOI;
+private ListObjectInspector indicesOI;
+private PrimitiveObjectInspector indexOI;
+
+@Override
+public ObjectInspector initialize(ObjectInspector[] OIs) throws 
UDFArgumentException {
+if (OIs.length != 2) {
+throw new UDFArgumentLengthException("Specify two arguments.");
+}
+
+if (!HiveUtils.isListOI(OIs[0])) {
+throw new UDFArgumentTypeException(0, "Only array type 
argument is acceptable but "
++ OIs[0].getTypeName() + " was passed as `input`");
+}
+if (!HiveUtils.isListOI(OIs[1])
+|| !HiveUtils.isIntegerOI(((ListObjectInspector) 
OIs[1]).getListElementObjectInspector())) {
+throw new UDFArgumentTypeException(0, "Only array type 
argument is acceptable but "
++ OIs[0].getTypeName() + " was passed as `indices`");
+}
+
+inputOI = HiveUtils.asListOI(OIs[0]);
+elementOI = 
HiveUtils.asDoubleCompatibleOI(inputOI.getListElementObjectInspector());
+indicesOI = HiveUtils.asListOI(OIs[1]);
+indexOI = 
HiveUtils.asIntegerOI(indicesOI.getListElementObjectInspector());
+
+return ObjectInspectorFactory.getStandardListObjectInspector(
+PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+}
+
+@Override
+public Object evaluate(GenericUDF.DeferredObject[] dObj) throws 
HiveException {
+final double[] input = HiveUtils.asDoubleArray(dObj[0].get(), inputOI, 
elementOI);
+final List indices = indicesOI.getList(dObj[1].get());
+
+Preconditions.checkNotNull(input);
+Preconditions.checkNotNull(indices);
+
+List result = new ArrayList();
+for (Object indexObj : indices) {
+int index = PrimitiveObjectInspectorUtils.getInt(indexObj, 
indexOI);
+if (index > input.length - 1) {
+throw new ArrayIndexOutOfBoundsException(index);
+}
+
+result.add(new DoubleWritable(input[index]));
+}
+
+return result;
+}
+
+@Override
+public String getDisplayString(String[] children) {
+StringBuilder sb = new StringBuilder();
+sb.append("subarray_by_indices");
+sb.append("(");
+   

[incubator-hivemall] Git Push Summary

2016-12-01 Thread myui
Repository: incubator-hivemall
Updated Tags:  refs/tags/v0.5-alpha.1 [deleted] 2a66cf620


[incubator-hivemall] Git Push Summary

2016-12-01 Thread myui
Repository: incubator-hivemall
Updated Tags:  refs/tags/v0.4.2-rc.2 [deleted] e1df0504d