Repository: incubator-hivemall
Updated Branches:
  refs/heads/master 518e232d8 -> fad2941fd


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fad2941f/core/src/test/java/hivemall/utils/lang/PreconditionsTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/hivemall/utils/lang/PreconditionsTest.java 
b/core/src/test/java/hivemall/utils/lang/PreconditionsTest.java
new file mode 100644
index 0000000..b0cfbd0
--- /dev/null
+++ b/core/src/test/java/hivemall/utils/lang/PreconditionsTest.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.utils.lang;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.junit.Test;
+
+public class PreconditionsTest {
+
+    @Test(expected = UDFArgumentException.class)
+    public void testCheckNotNullTClassOfE() throws UDFArgumentException {
+        Preconditions.checkNotNull(null, UDFArgumentException.class);
+    }
+
+    @Test(expected = HiveException.class)
+    public void testCheckArgumentBooleanClassOfE() throws 
UDFArgumentException, HiveException {
+        Preconditions.checkArgument(false, HiveException.class);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fad2941f/docs/gitbook/SUMMARY.md
----------------------------------------------------------------------
diff --git a/docs/gitbook/SUMMARY.md b/docs/gitbook/SUMMARY.md
index c333c98..33bb46c 100644
--- a/docs/gitbook/SUMMARY.md
+++ b/docs/gitbook/SUMMARY.md
@@ -61,6 +61,8 @@
     * [Vectorize Features](ft_engineering/vectorizer.md)
     * [Quantify non-number features](ft_engineering/quantify.md)
 
+* [Feature selection](ft_engineering/feature_selection.md)
+
 ## Part IV - Evaluation
 
 * [Statistical evaluation of a prediction model](eval/stat_eval.md)

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fad2941f/docs/gitbook/ft_engineering/feature_selection.md
----------------------------------------------------------------------
diff --git a/docs/gitbook/ft_engineering/feature_selection.md 
b/docs/gitbook/ft_engineering/feature_selection.md
new file mode 100644
index 0000000..5a2a92b
--- /dev/null
+++ b/docs/gitbook/ft_engineering/feature_selection.md
@@ -0,0 +1,155 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+[Feature Selection](https://en.wikipedia.org/wiki/Feature_selection) is the 
process of selecting a subset of relevant features for use in model 
construction. 
+
+It is a useful technique to 1) improve prediction results by omitting 
redundant features, 2) to shorten training time, and 3) to know important 
features for prediction.
+
+*Note: The feature is supported Hivemall from v0.5-rc.1 or later.*
+
+<!-- toc -->
+
+# Supported Feature Selection algorithms
+
+* Chi-square (Chi2)
+    * In statistics, the $$\chi^2$$ test is applied to test the independence 
of two even events. Chi-square statistics between every feature variable and 
the target variable can be applied to Feature Selection. Refer [this 
article](http://nlp.stanford.edu/IR-book/html/htmledition/feature-selectionchi2-feature-selection-1.html)
 for Mathematical details.
+* Signal Noise Ratio (SNR)
+    * The Signal Noise Ratio (SNR) is a univariate feature ranking metric, 
which can be used as a feature selection criterion for binary classification 
problems. SNR is defined as $$|\mu_{1} - \mu_{2}| / (\sigma_{1} + 
\sigma_{2})$$, where $$\mu_{k}$$ is the mean value of the variable in classes 
$$k$$, and $$\sigma_{k}$$ is the standard deviations of the variable in classes 
$$k$$. Clearly, features with larger SNR are useful for classification.
+
+# Usage
+
+##  Feature Selection based on Chi-square test
+
+``` sql
+CREATE TABLE input (
+  X array<double>, -- features
+  Y array<int> -- binarized label
+);
+ 
+set hivevar:k=2;
+
+WITH stats AS (
+  SELECT
+    transpose_and_dot(Y, X) AS observed, -- array<array<double>>, shape = 
(n_classes, n_features)
+    array_sum(X) AS feature_count, -- n_features col vector, shape = (1, 
array<double>)
+    array_avg(Y) AS class_prob -- n_class col vector, shape = (1, 
array<double>)
+  FROM
+    input
+),
+test AS (
+  SELECT
+    transpose_and_dot(class_prob, feature_count) AS expected -- 
array<array<double>>, shape = (n_class, n_features)
+  FROM
+    stats
+),
+chi2 AS (
+  SELECT
+    chi2(r.observed, l.expected) AS v -- struct<array<double>, array<double>>, 
each shape = (1, n_features)
+  FROM
+    test l
+    CROSS JOIN stats r
+)
+SELECT
+  select_k_best(l.X, r.v.chi2, ${k}) as features -- top-k feature selection 
based on chi2 score
+FROM
+  input l
+  CROSS JOIN chi2 r;
+```
+
+## Feature Selection based on Signal Noise Ratio (SNR)
+
+``` sql
+CREATE TABLE input (
+  X array<double>, -- features
+  Y array<int> -- binarized label
+);
+
+set hivevar:k=2;
+
+WITH snr AS (
+  SELECT snr(X, Y) AS snr -- aggregated SNR as array<double>, shape = (1, 
#features)
+  FROM input
+)
+SELECT 
+  select_k_best(X, snr, ${k}) as features
+FROM
+  input
+  CROSS JOIN snr;
+```
+
+# Function signatures
+
+### [UDAF] `transpose_and_dot(X::array<number>, 
Y::array<number>)::array<array<double>>`
+
+##### Input
+
+| `array<number>` X | `array<number>` Y |
+| :-: | :-: |
+| a row of matrix | a row of matrix |
+
+##### Output
+
+| `array<array<double>>` dot product |
+| :-: |
+| `dot(X.T, Y)` of shape = (X.#cols, Y.#cols) |
+
+### [UDF] `select_k_best(X::array<number>, importance_list::array<number>, 
k::int)::array<double>`
+
+##### Input
+
+| `array<number>` X | `array<number>` importance_list | `int` k |
+| :-: | :-: | :-: |
+| feature vector | importance of each feature | the number of features to be 
selected |
+
+##### Output
+
+| `array<array<double>>` k-best features |
+| :-: |
+| top-k elements from feature vector `X` based on importance list |
+
+### [UDF] `chi2(observed::array<array<number>>, 
expected::array<array<number>>)::struct<array<double>, array<double>>`
+
+##### Input
+
+| `array<number>` observed | `array<number>` expected |
+| :-: | :-: |
+| observed features | expected features `dot(class_prob.T, feature_count)` |
+
+Both of `observed` and `expected` have a shape `(#classes, #features)`
+
+##### Output
+
+| `struct<array<double>, array<double>>` importance_list |
+| :-: |
+| chi2-value and p-value for each feature |
+
+### [UDAF] `snr(X::array<number>, Y::array<int>)::array<double>`
+
+##### Input
+
+| `array<number>` X | `array<int>` Y |
+| :-: | :-: |
+| feature vector | one hot label |
+
+##### Output
+
+| `array<double>` importance_list |
+| :-: |
+| Signal Noise Ratio for each feature |
+

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fad2941f/docs/gitbook/ft_engineering/quantify.md
----------------------------------------------------------------------
diff --git a/docs/gitbook/ft_engineering/quantify.md 
b/docs/gitbook/ft_engineering/quantify.md
index 952db53..1bfaa73 100644
--- a/docs/gitbook/ft_engineering/quantify.md
+++ b/docs/gitbook/ft_engineering/quantify.md
@@ -19,7 +19,7 @@
         
 `quantified_features` is useful for transforming values of non-number columns 
to indexed numbers.
 
-*Note: The feature is supported Hivemall v0.4 or later.*
+*Note: The feature is supported from Hivemall v0.4 or later.*
 
 ```sql
 desc train;

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fad2941f/resources/ddl/define-all-as-permanent.hive
----------------------------------------------------------------------
diff --git a/resources/ddl/define-all-as-permanent.hive 
b/resources/ddl/define-all-as-permanent.hive
index 1906de1..72835b1 100644
--- a/resources/ddl/define-all-as-permanent.hive
+++ b/resources/ddl/define-all-as-permanent.hive
@@ -199,6 +199,16 @@ CREATE FUNCTION zscore as 
'hivemall.ftvec.scaling.ZScoreUDF' USING JAR '${hivema
 DROP FUNCTION IF EXISTS l2_normalize;
 CREATE FUNCTION l2_normalize as 'hivemall.ftvec.scaling.L2NormalizationUDF' 
USING JAR '${hivemall_jar}';
 
+---------------------------------
+-- Feature Selection functions --
+---------------------------------
+
+DROP FUNCTION IF EXISTS chi2;
+CREATE FUNCTION chi2 as 'hivemall.ftvec.selection.ChiSquareUDF' USING JAR 
'${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS snr;
+CREATE FUNCTION snr as 'hivemall.ftvec.selection.SignalNoiseRatioUDAF' USING 
JAR '${hivemall_jar}';
+
 --------------------
 -- misc functions --
 --------------------
@@ -386,6 +396,9 @@ CREATE FUNCTION to_string_array as 
'hivemall.tools.array.ToStringArrayUDF' USING
 DROP FUNCTION IF EXISTS array_intersect;
 CREATE FUNCTION array_intersect as 'hivemall.tools.array.ArrayIntersectUDF' 
USING JAR '${hivemall_jar}';
 
+DROP FUNCTION IF EXISTS select_k_best;
+CREATE FUNCTION select_k_best as 'hivemall.tools.array.SelectKBestUDF' USING 
JAR '${hivemall_jar}';
+
 -----------------------------
 -- bit operation functions --
 -----------------------------
@@ -436,6 +449,13 @@ DROP FUNCTION IF EXISTS sigmoid;
 CREATE FUNCTION sigmoid as 'hivemall.tools.math.SigmoidGenericUDF' USING JAR 
'${hivemall_jar}';
 
 ----------------------
+-- Matrix functions --
+----------------------
+
+DROP FUNCTION IF EXISTS transpose_and_dot;
+CREATE FUNCTION transpose_and_dot as 
'hivemall.tools.matrix.TransposeAndDotUDAF' USING JAR '${hivemall_jar}';
+
+----------------------
 -- mapred functions --
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fad2941f/resources/ddl/define-all.hive
----------------------------------------------------------------------
diff --git a/resources/ddl/define-all.hive b/resources/ddl/define-all.hive
index 37d262a..351303e 100644
--- a/resources/ddl/define-all.hive
+++ b/resources/ddl/define-all.hive
@@ -195,6 +195,16 @@ create temporary function zscore as 
'hivemall.ftvec.scaling.ZScoreUDF';
 drop temporary function l2_normalize;
 create temporary function l2_normalize as 
'hivemall.ftvec.scaling.L2NormalizationUDF';
 
+---------------------------------
+-- Feature Selection functions --
+---------------------------------
+
+drop temporary function chi2;
+create temporary function chi2 as 'hivemall.ftvec.selection.ChiSquareUDF';
+
+drop temporary function snr;
+create temporary function snr as 
'hivemall.ftvec.selection.SignalNoiseRatioUDAF';
+
 -----------------------------------
 -- Feature engineering functions --
 -----------------------------------
@@ -382,6 +392,9 @@ create temporary function to_string_array as 
'hivemall.tools.array.ToStringArray
 drop temporary function array_intersect;
 create temporary function array_intersect as 
'hivemall.tools.array.ArrayIntersectUDF';
 
+drop temporary function select_k_best;
+create temporary function select_k_best as 
'hivemall.tools.array.SelectKBestUDF';
+
 -----------------------------
 -- bit operation functions --
 -----------------------------
@@ -432,6 +445,13 @@ drop temporary function sigmoid;
 create temporary function sigmoid as 'hivemall.tools.math.SigmoidGenericUDF';
 
 ----------------------
+-- Matrix functions --
+----------------------
+
+drop temporary function transpose_and_dot;
+create temporary function transpose_and_dot as 
'hivemall.tools.matrix.TransposeAndDotUDAF';
+
+----------------------
 -- mapred functions --
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fad2941f/resources/ddl/define-all.spark
----------------------------------------------------------------------
diff --git a/resources/ddl/define-all.spark b/resources/ddl/define-all.spark
index 5de6106..838f7bb 100644
--- a/resources/ddl/define-all.spark
+++ b/resources/ddl/define-all.spark
@@ -6,7 +6,7 @@ sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS 
hivemall_version")
 sqlContext.sql("CREATE TEMPORARY FUNCTION hivemall_version AS 
'hivemall.HivemallVersionUDF'")
 
 /**
- * binary classification
+ * Binary classification
  */
 
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS train_perceptron")
@@ -59,7 +59,7 @@ sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS 
train_multiclass_scw2")
 sqlContext.sql("CREATE TEMPORARY FUNCTION train_multiclass_scw2 AS 
'hivemall.classifier.multiclass.MulticlassSoftConfidenceWeightedUDTF$SCW2'")
 
 /**
- * similarity functions
+ * Similarity functions
  */
 
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS cosine_sim")
@@ -78,7 +78,7 @@ sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS 
distance2similarity")
 sqlContext.sql("CREATE TEMPORARY FUNCTION distance2similarity AS 
'hivemall.knn.similarity.Distance2SimilarityUDF'")
 
 /**
- * distance functions
+ * Distance functions
  */
 
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS homming_distance")
@@ -122,7 +122,7 @@ sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS 
bbit_minhash")
 sqlContext.sql("CREATE TEMPORARY FUNCTION bbit_minhash AS 
'hivemall.knn.lsh.bBitMinHashUDF'")
 
 /**
- * voting functions
+ * Voting functions
  */
 
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS voted_avg")
@@ -132,7 +132,7 @@ sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS 
weight_voted_avg")
 sqlContext.sql("CREATE TEMPORARY FUNCTION weight_voted_avg AS 
'hivemall.ensemble.bagging.WeightVotedAvgUDAF'")
 
 /**
- * misc functions
+ * MISC functions
  */
 
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS max_label")
@@ -145,7 +145,7 @@ sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS 
argmin_kld")
 sqlContext.sql("CREATE TEMPORARY FUNCTION argmin_kld AS 
'hivemall.ensemble.ArgminKLDistanceUDAF'")
 
 /**
- * hashing functions
+ * Feature hashing functions
  */
 
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS mhash")
@@ -161,7 +161,7 @@ sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS 
prefixed_hash_values")
 sqlContext.sql("CREATE TEMPORARY FUNCTION prefixed_hash_values AS 
'hivemall.ftvec.hashing.ArrayPrefixedHashValuesUDF'")
 
 /**
- * pairing functions
+ * Feature pairing functions
  */
 
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS polynomial_features")
@@ -171,7 +171,7 @@ sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS 
powered_features")
 sqlContext.sql("CREATE TEMPORARY FUNCTION powered_features AS 
'hivemall.ftvec.pairing.PoweredFeaturesUDF'")
 
 /**
- * scaling functions
+ * Feature scaling functions
  */
 
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS rescale")
@@ -184,7 +184,17 @@ sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS 
normalize")
 sqlContext.sql("CREATE TEMPORARY FUNCTION normalize AS 
'hivemall.ftvec.scaling.L2NormalizationUDF'")
 
 /**
- * misc functions
+ * Feature selection functions
+ */
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS chi2")
+sqlContext.sql("CREATE TEMPORARY FUNCTION chi2 AS 
'hivemall.ftvec.selection.ChiSquareUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS snr")
+sqlContext.sql("CREATE TEMPORARY FUNCTION snr AS 
'hivemall.ftvec.selection.SignalNoiseRatioUDAF'")
+
+/**
+ * MISC feature engineering functions
  */
 
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS amplify")
@@ -257,7 +267,7 @@ sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS tf")
 sqlContext.sql("CREATE TEMPORARY FUNCTION tf AS 
'hivemall.ftvec.text.TermFrequencyUDAF'")
 
 /**
- * fegression functions
+ * Regression functions
  */
 
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS train_logregr")
@@ -291,7 +301,7 @@ sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS 
train_arowe2_regr")
 sqlContext.sql("CREATE TEMPORARY FUNCTION train_arow_regr AS 
'hivemall.regression.AROWRegressionUDTF$AROWe2'")
 
 /**
- * array functions
+ * Array functions
  */
 
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS float_array")
@@ -321,8 +331,11 @@ sqlContext.sql("CREATE TEMPORARY FUNCTION subarray AS 
'hivemall.tools.array.Suba
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS array_avg")
 sqlContext.sql("CREATE TEMPORARY FUNCTION array_avg AS 
'hivemall.tools.array.ArrayAvgGenericUDAF'")
 
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS select_k_best")
+sqlContext.sql("CREATE TEMPORARY FUNCTION select_k_best AS 
'hivemall.tools.array.SelectKBestUDF'")
+
 /**
- * compression functions
+ * Compression functions
  */
 
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS inflate")
@@ -332,7 +345,7 @@ sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS deflate")
 sqlContext.sql("CREATE TEMPORARY FUNCTION deflate AS 
'hivemall.tools.compress.DeflateUDF'")
 
 /**
- * map functions
+ * Map functions
  */
 
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS map_get_sum")
@@ -355,14 +368,21 @@ sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS 
sigmoid")
 sqlContext.sql("CREATE TEMPORARY FUNCTION sigmoid AS 
'hivemall.tools.math.SigmoidGenericUDF'")
 
 /**
- * mapred functions
+ * Matrix functions
+ */
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS transpose_and_dot")
+sqlContext.sql("CREATE TEMPORARY FUNCTION transpose_and_dot AS 
'hivemall.tools.matrix.TransposeAndDotUDAF'")
+
+/**
+ * MAPRED functions
  */
 
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS rowid")
 sqlContext.sql("CREATE TEMPORARY FUNCTION rowid AS 
'hivemall.tools.mapred.RowIdUDFWrapper'")
 
 /**
- * misc functions
+ * MISC functions
  */
 
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS generate_series")

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fad2941f/resources/ddl/define-udfs.td.hql
----------------------------------------------------------------------
diff --git a/resources/ddl/define-udfs.td.hql b/resources/ddl/define-udfs.td.hql
index a0bea45..47dbd1d 100644
--- a/resources/ddl/define-udfs.td.hql
+++ b/resources/ddl/define-udfs.td.hql
@@ -50,6 +50,8 @@ create temporary function powered_features as 
'hivemall.ftvec.pairing.PoweredFea
 create temporary function rescale as 'hivemall.ftvec.scaling.RescaleUDF';
 create temporary function zscore as 'hivemall.ftvec.scaling.ZScoreUDF';
 create temporary function l2_normalize as 
'hivemall.ftvec.scaling.L2NormalizationUDF';
+create temporary function chi2 as 'hivemall.ftvec.selection.ChiSquareUDF';
+create temporary function snr as 
'hivemall.ftvec.selection.SignalNoiseRatioUDAF';
 create temporary function amplify as 'hivemall.ftvec.amplify.AmplifierUDTF';
 create temporary function rand_amplify as 
'hivemall.ftvec.amplify.RandomAmplifierUDTF';
 create temporary function add_bias as 'hivemall.ftvec.AddBiasUDF';
@@ -101,6 +103,7 @@ create temporary function array_avg as 
'hivemall.tools.array.ArrayAvgGenericUDAF
 create temporary function array_sum as 'hivemall.tools.array.ArraySumUDAF';
 create temporary function to_string_array as 
'hivemall.tools.array.ToStringArrayUDF';
 create temporary function array_intersect as 
'hivemall.tools.array.ArrayIntersectUDF';
+create temporary function select_k_best as 
'hivemall.tools.array.SelectKBestUDF';
 create temporary function bits_collect as 
'hivemall.tools.bits.BitsCollectUDAF';
 create temporary function to_bits as 'hivemall.tools.bits.ToBitsUDF';
 create temporary function unbits as 'hivemall.tools.bits.UnBitsUDF';
@@ -112,6 +115,7 @@ create temporary function map_tail_n as 
'hivemall.tools.map.MapTailNUDF';
 create temporary function to_map as 'hivemall.tools.map.UDAFToMap';
 create temporary function to_ordered_map as 
'hivemall.tools.map.UDAFToOrderedMap';
 create temporary function sigmoid as 'hivemall.tools.math.SigmoidGenericUDF';
+create temporary function transpose_and_dot as 
'hivemall.tools.matrix.TransposeAndDotUDAF';
 create temporary function taskid as 'hivemall.tools.mapred.TaskIdUDF';
 create temporary function jobid as 'hivemall.tools.mapred.JobIdUDF';
 create temporary function rowid as 'hivemall.tools.mapred.RowIdUDF';

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fad2941f/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala
----------------------------------------------------------------------
diff --git 
a/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala 
b/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala
index fd4da64..dd6db6c 100644
--- 
a/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala
+++ 
b/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/GroupedDataEx.scala
@@ -271,9 +271,32 @@ final class GroupedDataEx protected[sql](
    */
   def onehot_encoding(features: String*): DataFrame = {
     val udaf = HiveUDAFFunction(
-        new HiveFunctionWrapper("hivemall.ftvec.trans.OnehotEncodingUDAF"),
-        features.map(df.col(_).expr),
-        isUDAFBridgeRequired = false)
+      new HiveFunctionWrapper("hivemall.ftvec.trans.OnehotEncodingUDAF"),
+      features.map(df.col(_).expr),
+      isUDAFBridgeRequired = false)
+    toDF(Seq(Alias(udaf, udaf.prettyString)()))
+  }
+
+  /**
+   * @see hivemall.ftvec.selection.SignalNoiseRatioUDAF
+   */
+  def snr(X: String, Y: String): DataFrame = {
+    val udaf = HiveUDAFFunction(
+      new HiveFunctionWrapper("hivemall.ftvec.selection.SignalNoiseRatioUDAF"),
+      Seq(X, Y).map(df.col(_).expr),
+      isUDAFBridgeRequired = false)
+      .toAggregateExpression()
+    toDF(Seq(Alias(udaf, udaf.prettyString)()))
+  }
+
+  /**
+   * @see hivemall.tools.matrix.TransposeAndDotUDAF
+   */
+  def transpose_and_dot(X: String, Y: String): DataFrame = {
+    val udaf = HiveUDAFFunction(
+      new HiveFunctionWrapper("hivemall.tools.matrix.TransposeAndDotUDAF"),
+      Seq(X, Y).map(df.col(_).expr),
+      isUDAFBridgeRequired = false)
       .toAggregateExpression()
     toDF(Seq(Alias(udaf, udaf.prettyString)()))
   }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fad2941f/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
----------------------------------------------------------------------
diff --git 
a/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala 
b/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
index 4c750dd..8583e1c 100644
--- a/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
+++ b/spark/spark-1.6/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
@@ -1010,6 +1010,15 @@ object HivemallOps {
   }
 
   /**
+   * @see hivemall.ftvec.selection.ChiSquareUDF
+   * @group ftvec.selection
+   */
+  def chi2(observed: Column, expected: Column): Column = {
+    HiveGenericUDF(new HiveFunctionWrapper(
+      "hivemall.ftvec.selection.ChiSquareUDF"), Seq(observed.expr, 
expected.expr))
+  }
+
+  /**
    * @see hivemall.ftvec.conv.ToDenseFeaturesUDF
    * @group ftvec.conv
    */
@@ -1082,6 +1091,15 @@ object HivemallOps {
   }
 
   /**
+   * @see hivemall.tools.array.SelectKBestUDF
+   * @group tools.array
+   */
+  def select_k_best(X: Column, importanceList: Column, k: Column): Column = {
+    HiveGenericUDF(new HiveFunctionWrapper(
+      "hivemall.tools.array.SelectKBestUDF"), Seq(X.expr, importanceList.expr, 
k.expr))
+  }
+
+  /**
    * @see hivemall.tools.math.SigmoidUDF
    * @group misc
    */

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fad2941f/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
----------------------------------------------------------------------
diff --git 
a/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
 
b/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
index 901056d..4c77f18 100644
--- 
a/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
+++ 
b/spark/spark-1.6/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
@@ -187,6 +187,35 @@ final class HivemallOpsSuite extends HivemallQueryTest {
         Row(Seq("1:1.0"))))
   }
 
+  test("ftvec.selection - chi2") {
+    import hiveContext.implicits._
+
+    // see also hivemall.ftvec.selection.ChiSquareUDFTest
+    val df = Seq(
+      Seq(
+        Seq(250.29999999999998, 170.90000000000003, 73.2, 12.199999999999996),
+        Seq(296.8, 138.50000000000003, 212.99999999999997, 66.3),
+        Seq(329.3999999999999, 148.7, 277.59999999999997, 101.29999999999998)
+      ) -> Seq(
+        Seq(292.1666753739119, 152.70000455081467, 187.93333893418327, 
59.93333511948589),
+        Seq(292.1666753739119, 152.70000455081467, 187.93333893418327, 
59.93333511948589),
+        Seq(292.1666753739119, 152.70000455081467, 187.93333893418327, 
59.93333511948589)))
+      .toDF("arg0", "arg1")
+
+    val result = df.select(chi2(df("arg0"), df("arg1"))).collect
+    assert(result.length == 1)
+    val chi2Val = result.head.getAs[Row](0).getAs[Seq[Double]](0)
+    val pVal = result.head.getAs[Row](0).getAs[Seq[Double]](1)
+
+    (chi2Val, Seq(10.81782088, 3.59449902, 116.16984746, 67.24482759))
+      .zipped
+      .foreach((actual, expected) => assert(actual ~== expected))
+
+    (pVal, Seq(4.47651499e-03, 1.65754167e-01, 5.94344354e-26, 2.50017968e-15))
+      .zipped
+      .foreach((actual, expected) => assert(actual ~== expected))
+  }
+
   test("ftvec.conv - quantify") {
     import hiveContext.implicits._
     val testDf = Seq((1, "aaa", true), (2, "bbb", false), (3, "aaa", 
false)).toDF
@@ -336,6 +365,17 @@ final class HivemallOpsSuite extends HivemallQueryTest {
     checkAnswer(predicted, Seq(Row(0), Row(1)))
   }
 
+  test("tools.array - select_k_best") {
+    import hiveContext.implicits._
+
+    val data = Seq(Seq(0, 1, 3), Seq(2, 4, 1), Seq(5, 4, 9))
+    val df = data.map(d => (d, Seq(3, 1, 2), 2)).toDF("features", 
"importance_list", "k")
+
+    // if use checkAnswer here, fail for some reason, maybe type? but it's 
okay on spark-2.0
+    assert(df.select(select_k_best(df("features"), df("importance_list"), 
df("k"))).collect ===
+      data.map(s => Row(Seq(s(0).toDouble, s(2).toDouble))))
+  }
+
   test("misc - sigmoid") {
     import hiveContext.implicits._
     /**
@@ -534,14 +574,13 @@ final class HivemallOpsSuite extends HivemallQueryTest {
     assert(row4(0).getDouble(1) ~== 0.25)
   }
 
-  test("user-defined aggregators for ftvec.trans") {
+  ignore("user-defined aggregators for ftvec.trans") {
     import hiveContext.implicits._
 
     val df0 = Seq((1, "cat", "mammal", 9), (1, "dog", "mammal", 10), (1, 
"human", "mammal", 10),
       (1, "seahawk", "bird", 101), (1, "wasp", "insect", 3), (1, "wasp", 
"insect", 9),
       (1, "cat", "mammal", 101), (1, "dog", "mammal", 1), (1, "human", 
"mammal", 9))
-    .toDF("col0", "cat1", "cat2", "cat3")
-
+      .toDF("col0", "cat1", "cat2", "cat3")
     val row00 = df0.groupby($"col0").onehot_encoding("cat1")
     val row01 = df0.groupby($"col0").onehot_encoding("cat1", "cat2", "cat3")
 
@@ -560,4 +599,64 @@ final class HivemallOpsSuite extends HivemallQueryTest {
     assert(result012.keySet === Set(1, 3, 9, 10, 101))
     assert(result012.values.toSet === Set(9, 10, 11, 12, 13))
   }
+
+  test("user-defined aggregators for ftvec.selection") {
+    import hiveContext.implicits._
+
+    // see also hivemall.ftvec.selection.SignalNoiseRatioUDAFTest
+    // binary class
+    // +-----------------+-------+
+    // |     features    | class |
+    // +-----------------+-------+
+    // | 5.1,3.5,1.4,0.2 |     0 |
+    // | 4.9,3.0,1.4,0.2 |     0 |
+    // | 4.7,3.2,1.3,0.2 |     0 |
+    // | 7.0,3.2,4.7,1.4 |     1 |
+    // | 6.4,3.2,4.5,1.5 |     1 |
+    // | 6.9,3.1,4.9,1.5 |     1 |
+    // +-----------------+-------+
+    val df0 = Seq(
+      (1, Seq(5.1, 3.5, 1.4, 0.2), Seq(1, 0)), (1, Seq(4.9, 3.0, 1.4, 0.2), 
Seq(1, 0)),
+      (1, Seq(4.7, 3.2, 1.3, 0.2), Seq(1, 0)), (1, Seq(7.0, 3.2, 4.7, 1.4), 
Seq(0, 1)),
+      (1, Seq(6.4, 3.2, 4.5, 1.5), Seq(0, 1)), (1, Seq(6.9, 3.1, 4.9, 1.5), 
Seq(0, 1)))
+      .toDF("c0", "arg0", "arg1")
+    val row0 = df0.groupby($"c0").snr("arg0", "arg1").collect
+    (row0(0).getAs[Seq[Double]](1), Seq(4.38425236, 0.26390002, 15.83984511, 
26.87005769))
+      .zipped
+      .foreach((actual, expected) => assert(actual ~== expected))
+
+    // multiple class
+    // +-----------------+-------+
+    // |     features    | class |
+    // +-----------------+-------+
+    // | 5.1,3.5,1.4,0.2 |     0 |
+    // | 4.9,3.0,1.4,0.2 |     0 |
+    // | 7.0,3.2,4.7,1.4 |     1 |
+    // | 6.4,3.2,4.5,1.5 |     1 |
+    // | 6.3,3.3,6.0,2.5 |     2 |
+    // | 5.8,2.7,5.1,1.9 |     2 |
+    // +-----------------+-------+
+    val df1 = Seq(
+      (1, Seq(5.1, 3.5, 1.4, 0.2), Seq(1, 0, 0)), (1, Seq(4.9, 3.0, 1.4, 0.2), 
Seq(1, 0, 0)),
+      (1, Seq(7.0, 3.2, 4.7, 1.4), Seq(0, 1, 0)), (1, Seq(6.4, 3.2, 4.5, 1.5), 
Seq(0, 1, 0)),
+      (1, Seq(6.3, 3.3, 6.0, 2.5), Seq(0, 0, 1)), (1, Seq(5.8, 2.7, 5.1, 1.9), 
Seq(0, 0, 1)))
+      .toDF("c0", "arg0", "arg1")
+    val row1 = df1.groupby($"c0").snr("arg0", "arg1").collect
+    (row1(0).getAs[Seq[Double]](1), Seq(8.43181818, 1.32121212, 42.94949495, 
33.80952381))
+      .zipped
+      .foreach((actual, expected) => assert(actual ~== expected))
+  }
+
+  test("user-defined aggregators for tools.matrix") {
+    import hiveContext.implicits._
+
+    // | 1  2  3 |T    | 5  6  7 |
+    // | 3  4  5 |  *  | 7  8  9 |
+    val df0 = Seq((1, Seq(1, 2, 3), Seq(5, 6, 7)), (1, Seq(3, 4, 5), Seq(7, 8, 
9)))
+      .toDF("c0", "arg0", "arg1")
+
+    // if use checkAnswer here, fail for some reason, maybe type? but it's 
okay on spark-2.0
+    assert(df0.groupby($"c0").transpose_and_dot("arg0", "arg1").collect() ===
+      Seq(Row(1, Seq(Seq(26.0, 30.0, 34.0), Seq(38.0, 44.0, 50.0), Seq(50.0, 
58.0, 66.0)))))
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fad2941f/spark/spark-2.0/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
----------------------------------------------------------------------
diff --git 
a/spark/spark-2.0/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
 
b/spark/spark-2.0/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
index 8ac7185..bdeff98 100644
--- 
a/spark/spark-2.0/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
+++ 
b/spark/spark-2.0/src/main/scala/org/apache/spark/sql/hive/HivemallGroupedDataset.scala
@@ -133,6 +133,19 @@ final class HivemallGroupedDataset(groupBy: 
RelationalGroupedDataset) {
   }
 
   /**
+   * @see hivemall.tools.matrix.TransposeAndDotUDAF
+   */
+  def transpose_and_dot(X: String, Y: String): DataFrame = {
+    val udaf = HiveUDAFFunction(
+        "transpose_and_dot",
+        new HiveFunctionWrapper("hivemall.tools.matrix.TransposeAndDotUDAF"),
+        Seq(X, Y).map(df.col(_).expr),
+        isUDAFBridgeRequired = false)
+      .toAggregateExpression()
+    toDF(Seq(Alias(udaf, udaf.prettyName)()))
+  }
+
+  /**
    * @see hivemall.ftvec.trans.OnehotEncodingUDAF
    * @group ftvec.trans
    */
@@ -147,6 +160,19 @@ final class HivemallGroupedDataset(groupBy: 
RelationalGroupedDataset) {
   }
 
   /**
+   * @see hivemall.ftvec.selection.SignalNoiseRatioUDAF
+   */
+  def snr(X: String, Y: String): DataFrame = {
+    val udaf = HiveUDAFFunction(
+        "snr",
+        new 
HiveFunctionWrapper("hivemall.ftvec.selection.SignalNoiseRatioUDAF"),
+        Seq(X, Y).map(df.col(_).expr),
+        isUDAFBridgeRequired = false)
+      .toAggregateExpression()
+    toDF(Seq(Alias(udaf, udaf.prettyName)()))
+  }
+
+  /**
    * @see hivemall.evaluation.MeanAbsoluteErrorUDAF
    * @group evaluation
    */

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fad2941f/spark/spark-2.0/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
----------------------------------------------------------------------
diff --git 
a/spark/spark-2.0/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala 
b/spark/spark-2.0/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
index ba58039..9bde84f 100644
--- a/spark/spark-2.0/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
+++ b/spark/spark-2.0/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala
@@ -1216,6 +1216,16 @@ object HivemallOps {
   }
 
   /**
+   * @see hivemall.ftvec.selection.ChiSquareUDF
+   * @group ftvec.selection
+   */
+  def chi2(observed: Column, expected: Column): Column = withExpr {
+    HiveGenericUDF("chi2",
+      new HiveFunctionWrapper("hivemall.ftvec.selection.ChiSquareUDF"),
+      Seq(observed.expr, expected.expr))
+  }
+
+  /**
    * @see hivemall.ftvec.conv.ToDenseFeaturesUDF
    * @group ftvec.conv
    */
@@ -1295,6 +1305,16 @@ object HivemallOps {
   }
 
   /**
+   * @see hivemall.tools.array.SelectKBestUDF
+   * @group tools.array
+   */
+  def select_k_best(X: Column, importanceList: Column, k: Column): Column = 
withExpr {
+    HiveGenericUDF("select_k_best",
+      new HiveFunctionWrapper("hivemall.tools.array.SelectKBestUDF"),
+      Seq(X.expr, importanceList.expr, k.expr))
+  }
+
+  /**
    * @see hivemall.tools.math.SigmoidUDF
    * @group misc
    */

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fad2941f/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
----------------------------------------------------------------------
diff --git 
a/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
 
b/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
index a093e07..6f2f016 100644
--- 
a/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
+++ 
b/spark/spark-2.0/src/test/scala/org/apache/spark/sql/hive/HivemallOpsSuite.scala
@@ -188,6 +188,35 @@ final class HivemallOpsWithFeatureSuite extends 
HivemallFeatureQueryTest {
         Row(Seq("1:1.0"))))
   }
 
+  test("ftvec.selection - chi2") {
+    import hiveContext.implicits._
+
+    // see also hivemall.ftvec.selection.ChiSquareUDFTest
+    val df = Seq(
+      Seq(
+        Seq(250.29999999999998, 170.90000000000003, 73.2, 12.199999999999996),
+        Seq(296.8, 138.50000000000003, 212.99999999999997, 66.3),
+        Seq(329.3999999999999, 148.7, 277.59999999999997, 101.29999999999998)
+      ) -> Seq(
+        Seq(292.1666753739119, 152.70000455081467, 187.93333893418327, 
59.93333511948589),
+        Seq(292.1666753739119, 152.70000455081467, 187.93333893418327, 
59.93333511948589),
+        Seq(292.1666753739119, 152.70000455081467, 187.93333893418327, 
59.93333511948589)))
+      .toDF("arg0", "arg1")
+
+    val result = df.select(chi2(df("arg0"), df("arg1"))).collect
+    assert(result.length == 1)
+    val chi2Val = result.head.getAs[Row](0).getAs[Seq[Double]](0)
+    val pVal = result.head.getAs[Row](0).getAs[Seq[Double]](1)
+
+    (chi2Val, Seq(10.81782088, 3.59449902, 116.16984746, 67.24482759))
+      .zipped
+      .foreach((actual, expected) => assert(actual ~== expected))
+
+    (pVal, Seq(4.47651499e-03, 1.65754167e-01, 5.94344354e-26, 2.50017968e-15))
+      .zipped
+      .foreach((actual, expected) => assert(actual ~== expected))
+  }
+
   test("ftvec.conv - quantify") {
     import hiveContext.implicits._
     val testDf = Seq((1, "aaa", true), (2, "bbb", false), (3, "aaa", 
false)).toDF
@@ -361,6 +390,18 @@ final class HivemallOpsWithFeatureSuite extends 
HivemallFeatureQueryTest {
     checkAnswer(predicted, Seq(Row(0), Row(1)))
   }
 
+  test("tools.array - select_k_best") {
+    import hiveContext.implicits._
+    import org.apache.spark.sql.functions._
+
+    val data = Seq(Seq(0, 1, 3), Seq(2, 4, 1), Seq(5, 4, 9))
+    val df = data.map(d => (d, Seq(3, 1, 2))).toDF("features", 
"importance_list")
+    val k = 2
+
+    checkAnswer(df.select(select_k_best(df("features"), df("importance_list"), 
lit(k))),
+      data.map(s => Row(Seq(s(0).toDouble, s(2).toDouble))))
+  }
+
   test("misc - sigmoid") {
     import hiveContext.implicits._
     /**
@@ -661,6 +702,65 @@ final class HivemallOpsWithFeatureSuite extends 
HivemallFeatureQueryTest {
     assert(result012.keySet === Set(1, 3, 9, 10, 101))
     assert(result012.values.toSet === Set(9, 10, 11, 12, 13))
   }
+
+  test("user-defined aggregators for ftvec.selection") {
+    import hiveContext.implicits._
+
+    // see also hivemall.ftvec.selection.SignalNoiseRatioUDAFTest
+    // binary class
+    // +-----------------+-------+
+    // |     features    | class |
+    // +-----------------+-------+
+    // | 5.1,3.5,1.4,0.2 |     0 |
+    // | 4.9,3.0,1.4,0.2 |     0 |
+    // | 4.7,3.2,1.3,0.2 |     0 |
+    // | 7.0,3.2,4.7,1.4 |     1 |
+    // | 6.4,3.2,4.5,1.5 |     1 |
+    // | 6.9,3.1,4.9,1.5 |     1 |
+    // +-----------------+-------+
+    val df0 = Seq(
+      (1, Seq(5.1, 3.5, 1.4, 0.2), Seq(1, 0)), (1, Seq(4.9, 3.0, 1.4, 0.2), 
Seq(1, 0)),
+      (1, Seq(4.7, 3.2, 1.3, 0.2), Seq(1, 0)), (1, Seq(7.0, 3.2, 4.7, 1.4), 
Seq(0, 1)),
+      (1, Seq(6.4, 3.2, 4.5, 1.5), Seq(0, 1)), (1, Seq(6.9, 3.1, 4.9, 1.5), 
Seq(0, 1)))
+      .toDF("c0", "arg0", "arg1")
+    val row0 = df0.groupBy($"c0").snr("arg0", "arg1").collect
+    (row0(0).getAs[Seq[Double]](1), Seq(4.38425236, 0.26390002, 15.83984511, 
26.87005769))
+      .zipped
+      .foreach((actual, expected) => assert(actual ~== expected))
+
+    // multiple class
+    // +-----------------+-------+
+    // |     features    | class |
+    // +-----------------+-------+
+    // | 5.1,3.5,1.4,0.2 |     0 |
+    // | 4.9,3.0,1.4,0.2 |     0 |
+    // | 7.0,3.2,4.7,1.4 |     1 |
+    // | 6.4,3.2,4.5,1.5 |     1 |
+    // | 6.3,3.3,6.0,2.5 |     2 |
+    // | 5.8,2.7,5.1,1.9 |     2 |
+    // +-----------------+-------+
+    val df1 = Seq(
+      (1, Seq(5.1, 3.5, 1.4, 0.2), Seq(1, 0, 0)), (1, Seq(4.9, 3.0, 1.4, 0.2), 
Seq(1, 0, 0)),
+      (1, Seq(7.0, 3.2, 4.7, 1.4), Seq(0, 1, 0)), (1, Seq(6.4, 3.2, 4.5, 1.5), 
Seq(0, 1, 0)),
+      (1, Seq(6.3, 3.3, 6.0, 2.5), Seq(0, 0, 1)), (1, Seq(5.8, 2.7, 5.1, 1.9), 
Seq(0, 0, 1)))
+      .toDF("c0", "arg0", "arg1")
+    val row1 = df1.groupBy($"c0").snr("arg0", "arg1").collect
+    (row1(0).getAs[Seq[Double]](1), Seq(8.43181818, 1.32121212, 42.94949495, 
33.80952381))
+      .zipped
+      .foreach((actual, expected) => assert(actual ~== expected))
+  }
+
+  test("user-defined aggregators for tools.matrix") {
+    import hiveContext.implicits._
+
+    // | 1  2  3 |T    | 5  6  7 |
+    // | 3  4  5 |  *  | 7  8  9 |
+    val df0 = Seq((1, Seq(1, 2, 3), Seq(5, 6, 7)), (1, Seq(3, 4, 5), Seq(7, 8, 
9)))
+      .toDF("c0", "arg0", "arg1")
+
+    checkAnswer(df0.groupBy($"c0").transpose_and_dot("arg0", "arg1"),
+      Seq(Row(1, Seq(Seq(26.0, 30.0, 34.0), Seq(38.0, 44.0, 50.0), Seq(50.0, 
58.0, 66.0)))))
+  }
 }
 
 final class HivemallOpsWithVectorSuite extends VectorQueryTest {


Reply via email to