[2/4] incubator-hivemall git commit: [HIVEMALL-145] Merge Brickhouse functions

myui Wed, 06 Jun 2018 02:09:52 -0700

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/core/src/test/java/hivemall/tools/datetime/SessionizeUDFTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/hivemall/tools/datetime/SessionizeUDFTest.java 
b/core/src/test/java/hivemall/tools/datetime/SessionizeUDFTest.java
new file mode 100644
index 0000000..2aca351
--- /dev/null
+++ b/core/src/test/java/hivemall/tools/datetime/SessionizeUDFTest.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.tools.datetime;
+
+import static hivemall.utils.hadoop.WritableUtils.val;
+
+import hivemall.TestUtils;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.Text;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class SessionizeUDFTest {
+
+    @Test
+    public void testTwoArgs() {
+        SessionizeUDF udf = new SessionizeUDF();
+
+        Text session1 = new Text(udf.evaluate(val(30L), val(10L)));
+        Assert.assertNotNull(session1);
+
+        Text session2 = new Text(udf.evaluate(val(35L), val(10L)));
+        Assert.assertEquals(session1, session2);
+
+        Text session3 = new Text(udf.evaluate(val(40L), val(10L)));
+        Assert.assertEquals(session2, session3);
+
+        Text session4 = new Text(udf.evaluate(val(50L), val(10L)));
+        Assert.assertNotEquals(session3, session4);
+    }
+
+    @Test
+    public void testThreeArgs() {
+        SessionizeUDF udf = new SessionizeUDF();
+
+        Text session1 = new Text(udf.evaluate(val(30L), val(10L), 
val("subject1")));
+        Assert.assertNotNull(session1);
+
+        Text session2 = new Text(udf.evaluate(val(35L), val(10L), 
val("subject1")));
+        Assert.assertEquals(session1, session2);
+
+        Text session3 = new Text(udf.evaluate(val(40L), val(10L), 
val("subject2")));
+        Assert.assertNotEquals(session2, session3);
+
+        Text session4 = new Text(udf.evaluate(val(45L), val(10L), 
val("subject2")));
+        Assert.assertEquals(session3, session4);
+    }
+
+    @Test
+    public void testSerialization() throws HiveException {
+        SessionizeUDF udf = new SessionizeUDF();
+
+        udf.evaluate(val((long) (System.currentTimeMillis() / 1000.0d)), 
val(30L));
+        udf.evaluate(val((long) (System.currentTimeMillis() / 1000.0d)), 
val(30L));
+
+        byte[] serialized = TestUtils.serializeObjectByKryo(udf);
+        TestUtils.deserializeObjectByKryo(serialized, SessionizeUDF.class);
+    }
+}


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/core/src/test/java/hivemall/tools/json/FromJsonUDFTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/hivemall/tools/json/FromJsonUDFTest.java 
b/core/src/test/java/hivemall/tools/json/FromJsonUDFTest.java
index 8bb8db7..738a939 100644
--- a/core/src/test/java/hivemall/tools/json/FromJsonUDFTest.java
+++ b/core/src/test/java/hivemall/tools/json/FromJsonUDFTest.java
@@ -90,4 +90,5 @@ public class FromJsonUDFTest {
                     HiveUtils.getConstStringObjectInspector("array<double>")},
             new Object[] {"[0.1,1.1,2.2]"});
     }
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/core/src/test/java/hivemall/tools/json/ToJsonUDFTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/hivemall/tools/json/ToJsonUDFTest.java 
b/core/src/test/java/hivemall/tools/json/ToJsonUDFTest.java
index 39bd64f..f7f698c 100644
--- a/core/src/test/java/hivemall/tools/json/ToJsonUDFTest.java
+++ b/core/src/test/java/hivemall/tools/json/ToJsonUDFTest.java
@@ -21,6 +21,9 @@ package hivemall.tools.json;
 import hivemall.TestUtils;
 import hivemall.utils.hadoop.WritableUtils;
 
+import java.io.IOException;
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
@@ -31,9 +34,6 @@ import org.apache.hadoop.io.Text;
 import org.junit.Assert;
 import org.junit.Test;
 
-import java.io.IOException;
-import java.util.Arrays;
-
 public class ToJsonUDFTest {
 
     @Test

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/core/src/test/java/hivemall/tools/map/MapKeyValuesUDFTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/hivemall/tools/map/MapKeyValuesUDFTest.java 
b/core/src/test/java/hivemall/tools/map/MapKeyValuesUDFTest.java
new file mode 100644
index 0000000..2164dc1
--- /dev/null
+++ b/core/src/test/java/hivemall/tools/map/MapKeyValuesUDFTest.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.tools.map;
+
+import hivemall.TestUtils;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class MapKeyValuesUDFTest {
+
+
+    @Test
+    public void testStringDouble() throws HiveException, IOException {
+        MapKeyValuesUDF udf = new MapKeyValuesUDF();
+
+        udf.initialize(new ObjectInspector[] 
{ObjectInspectorFactory.getStandardMapObjectInspector(
+            PrimitiveObjectInspectorFactory.javaStringObjectInspector,
+            PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)});
+
+        Map<String, DoubleWritable> input = new HashMap<>();
+        for (int i = 0; i < 10; i++) {
+            input.put("k" + i, new DoubleWritable(i));
+        }
+
+        GenericUDF.DeferredObject[] arguments =
+                new GenericUDF.DeferredObject[] {new 
GenericUDF.DeferredJavaObject(input)};
+
+        List<Object[]> actual = udf.evaluate(arguments);
+
+        Assert.assertEquals(input.size(), actual.size());
+        for (Object[] e : actual) {
+            Assert.assertEquals(2, e.length);
+            Object v = input.get(e[0]);
+            Assert.assertEquals(e[1], v);
+        }
+
+        udf.close();
+    }
+
+    @Test
+    public void testSerialization() throws UDFArgumentException {
+        MapKeyValuesUDF udf = new MapKeyValuesUDF();
+
+        udf.initialize(new ObjectInspector[] 
{ObjectInspectorFactory.getStandardMapObjectInspector(
+            PrimitiveObjectInspectorFactory.javaStringObjectInspector,
+            PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)});
+
+        byte[] serialized = TestUtils.serializeObjectByKryo(udf);
+        TestUtils.deserializeObjectByKryo(serialized, MapKeyValuesUDF.class);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/core/src/test/java/hivemall/tools/sanity/RaiseErrorUDFTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/hivemall/tools/sanity/RaiseErrorUDFTest.java 
b/core/src/test/java/hivemall/tools/sanity/RaiseErrorUDFTest.java
index 004ba26..a96ea57 100644
--- a/core/src/test/java/hivemall/tools/sanity/RaiseErrorUDFTest.java
+++ b/core/src/test/java/hivemall/tools/sanity/RaiseErrorUDFTest.java
@@ -18,15 +18,21 @@
  */
 package hivemall.tools.sanity;
 
+import java.io.IOException;
+
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
 import org.junit.Test;
 
 public class RaiseErrorUDFTest {
 
     @Test(expected = HiveException.class)
-    public void test() throws HiveException {
+    public void test() throws HiveException, IOException {
         RaiseErrorUDF udf = new RaiseErrorUDF();
-        udf.evaluate();
+
+        udf.evaluate(new DeferredObject[] {});
+
+        udf.close();
     }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/core/src/test/java/hivemall/tools/timeseries/MovingAverageUDTFTest.java
----------------------------------------------------------------------
diff --git 
a/core/src/test/java/hivemall/tools/timeseries/MovingAverageUDTFTest.java 
b/core/src/test/java/hivemall/tools/timeseries/MovingAverageUDTFTest.java
new file mode 100644
index 0000000..815b567
--- /dev/null
+++ b/core/src/test/java/hivemall/tools/timeseries/MovingAverageUDTFTest.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.tools.timeseries;
+
+import hivemall.TestUtils;
+import hivemall.tools.timeseries.MovingAverageUDTF;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.Collector;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class MovingAverageUDTFTest {
+
+    @Test
+    public void test() throws HiveException {
+        MovingAverageUDTF udtf = new MovingAverageUDTF();
+
+        ObjectInspector argOI0 = 
PrimitiveObjectInspectorFactory.javaFloatObjectInspector;
+        ObjectInspector argOI1 = 
ObjectInspectorUtils.getConstantObjectInspector(
+            PrimitiveObjectInspectorFactory.javaIntObjectInspector, 3);
+
+        final List<Double> results = new ArrayList<>();
+        udtf.initialize(new ObjectInspector[] {argOI0, argOI1});
+        udtf.setCollector(new Collector() {
+            @Override
+            public void collect(Object input) throws HiveException {
+                Object[] objs = (Object[]) input;
+                Assert.assertEquals(1, objs.length);
+                Assert.assertTrue(objs[0] instanceof DoubleWritable);
+                double x = ((DoubleWritable) objs[0]).get();
+                results.add(x);
+            }
+        });
+
+        udtf.process(new Object[] {1.f, null});
+        udtf.process(new Object[] {2.f, null});
+        udtf.process(new Object[] {3.f, null});
+        udtf.process(new Object[] {4.f, null});
+        udtf.process(new Object[] {5.f, null});
+        udtf.process(new Object[] {6.f, null});
+        udtf.process(new Object[] {7.f, null});
+
+        Assert.assertEquals(Arrays.asList(1.d, 1.5d, 2.d, 3.d, 4.d, 5.d, 6.d), 
results);
+    }
+
+    @Test
+    public void testSerialization() throws HiveException {
+        TestUtils.testGenericUDTFSerialization(MovingAverageUDTF.class,
+            new ObjectInspector[] 
{PrimitiveObjectInspectorFactory.javaFloatObjectInspector,
+                    ObjectInspectorUtils.getConstantObjectInspector(
+                        
PrimitiveObjectInspectorFactory.javaIntObjectInspector, 3)},
+            new Object[][] {{1.f}, {2.f}, {3.f}, {4.f}, {5.f}});
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/core/src/test/java/hivemall/tools/vector/VectorAddUDFTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/hivemall/tools/vector/VectorAddUDFTest.java 
b/core/src/test/java/hivemall/tools/vector/VectorAddUDFTest.java
index 0aa90e7..fd70fcb 100644
--- a/core/src/test/java/hivemall/tools/vector/VectorAddUDFTest.java
+++ b/core/src/test/java/hivemall/tools/vector/VectorAddUDFTest.java
@@ -94,4 +94,5 @@ public class VectorAddUDFTest {
                         
PrimitiveObjectInspectorFactory.javaFloatObjectInspector)},
             new Object[] {Arrays.asList(1.d, 2.d, 3.d), Arrays.asList(2.f, 
3.f, 4.f)});
     }
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/core/src/test/java/hivemall/tools/vector/VectorDotUDFTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/hivemall/tools/vector/VectorDotUDFTest.java 
b/core/src/test/java/hivemall/tools/vector/VectorDotUDFTest.java
index b13c447..eb5c08f 100644
--- a/core/src/test/java/hivemall/tools/vector/VectorDotUDFTest.java
+++ b/core/src/test/java/hivemall/tools/vector/VectorDotUDFTest.java
@@ -52,8 +52,8 @@ public class VectorDotUDFTest {
                 new GenericUDF.DeferredJavaObject(
                     WritableUtils.toWritableList(new float[] {2, 3, 4}))};
 
-        List<Double> actual = udf.evaluate(args);
-        List<Double> expected = Arrays.asList(2.d, 6.d, 12.d);
+        Object actual = udf.evaluate(args);
+        Double expected = Double.valueOf(1.d * 2.d + 2.d * 3.d + 3.d * 4.d);
 
         Assert.assertEquals(expected, actual);
 
@@ -74,7 +74,7 @@ public class VectorDotUDFTest {
                     WritableUtils.toWritableList(new double[] {1, 2, 3})),
                 new GenericUDF.DeferredJavaObject(WritableUtils.val(2.f))};
 
-        List<Double> actual = udf.evaluate(args);
+        Object actual = udf.evaluate(args);
         List<Double> expected = Arrays.asList(2.d, 4.d, 6.d);
 
         Assert.assertEquals(expected, actual);
@@ -92,4 +92,5 @@ public class VectorDotUDFTest {
                         
PrimitiveObjectInspectorFactory.javaFloatObjectInspector)},
             new Object[] {Arrays.asList(1.d, 2.d, 3.d), Arrays.asList(2.f, 
3.f, 4.f)});
     }
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/docs/gitbook/misc/funcs.md
----------------------------------------------------------------------
diff --git a/docs/gitbook/misc/funcs.md b/docs/gitbook/misc/funcs.md
index 00d7bba..3449419 100644
--- a/docs/gitbook/misc/funcs.md
+++ b/docs/gitbook/misc/funcs.md
@@ -393,6 +393,84 @@ This page describes a list of Hivemall functions. See also 
a [list of generic Hi
 
 - `approx_count_distinct(expr x [, const string options])` - Returns an 
approximation of count(DISTINCT x) using HyperLogLogPlus algorithm
 
+- `bloom(string key)` - Constructs a BloomFilter by aggregating a set of keys
+  ```sql
+  CREATE TABLE satisfied_movies AS 
+    SELECT bloom(movieid) as movies
+    FROM (
+      SELECT movieid
+      FROM ratings
+      GROUP BY movieid
+      HAVING avg(rating) >= 4.0
+    ) t;
+  ```
+
+- `bloom_and(string bloom1, string bloom2)` - Returns the logical AND of two 
bloom filters
+  ```sql
+  SELECT bloom_and(bf1, bf2) FROM xxx;
+  ```
+
+- `bloom_contains(string bloom, string key)` or _FUNC_(string bloom, 
array&lt;string&gt; keys) - Returns true if the bloom filter contains all the 
given key(s). Returns false if key is null.
+  ```sql
+  WITH satisfied_movies as (
+    SELECT bloom(movieid) as movies
+    FROM (
+      SELECT movieid
+      FROM ratings
+      GROUP BY movieid
+      HAVING avg(rating) >= 4.0
+    ) t
+  )
+  SELECT
+    l.rating,
+    count(distinct l.userid) as cnt
+  FROM
+    ratings l 
+    CROSS JOIN satisfied_movies r
+  WHERE
+    bloom_contains(r.movies, l.movieid) -- includes false positive
+  GROUP BY 
+    l.rating;
+
+  l.rating        cnt
+  1       1296
+  2       2770
+  3       5008
+  4       5824
+  5       5925
+  ```
+
+- `bloom_contains_any(string bloom, string key)` or _FUNC_(string bloom, 
array&lt;string&gt; keys)- Returns true if the bloom filter contains any of the 
given key
+  ```sql
+  WITH data1 as (
+    SELECT explode(array(1,2,3,4,5)) as id
+  ),
+  data2 as (
+    SELECT explode(array(1,3,5,6,8)) as id
+  ),
+  bloom as (
+    SELECT bloom(id) as bf
+    FROM data1
+  )
+  SELECT 
+    l.* 
+  FROM 
+    data2 l
+    CROSS JOIN bloom r
+  WHERE
+    bloom_contains_any(r.bf, array(l.id))
+  ```
+
+- `bloom_not(string bloom)` - Returns the logical NOT of a bloom filters
+  ```sql
+  SELECT bloom_not(bf) FROM xxx;
+  ```
+
+- `bloom_or(string bloom1, string bloom2)` - Returns the logical OR of two 
bloom filters
+  ```sql
+  SELECT bloom_or(bf1, bf2) FROM xxx;
+  ```
+
 # Ensemble learning
 
 - `argmin_kld(float mean, float covar)` - Returns mean or covar that minimize 
a KL-distance among distributions
@@ -446,7 +524,7 @@ This page describes a list of Hivemall functions. See also 
a [list of generic Hi
 
 - `hivemall_version()` - Returns the version of Hivemall
   ```sql
-  Usage: SELECT hivemall_version();
+  SELECT hivemall_version();
   ```
 
 - `lr_datagen(options string)` - Generates a logistic regression dataset

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/docs/gitbook/misc/generic_funcs.md
----------------------------------------------------------------------
diff --git a/docs/gitbook/misc/generic_funcs.md 
b/docs/gitbook/misc/generic_funcs.md
index d33ab21..343a64a 100644
--- a/docs/gitbook/misc/generic_funcs.md
+++ b/docs/gitbook/misc/generic_funcs.md
@@ -24,88 +24,183 @@ This page describes a list of useful Hivemall generic 
functions. See also a [lis
 # Array
 
 - `array_append(array<T> arr, T elem)` - Append an element to the end of an 
array
+  ```sql
+  SELECT array_append(array(1,2),3);
+   1,2,3
+
+  SELECT array_append(array('a','b'),'c');
+   "a","b","c"
+  ```
 
 - `array_avg(array<number>)` - Returns an array&lt;double&gt; in which each 
element is the mean of a set of numbers
 
 - `array_concat(array<ANY> x1, array<ANY> x2, ..)` - Returns a concatenated 
array
   ```sql
-  select array_concat(array(1),array(2,3));
-  > [1,2,3]
+  SELECT array_concat(array(1),array(2,3));
+   [1,2,3]
   ```
 
 - `array_flatten(array<array<ANY>>)` - Returns an array with the elements 
flattened.
+  ```sql
+  SELECT array_flatten(array(array(1,2,3),array(4,5),array(6,7,8)));
+   [1,2,3,4,5,6,7,8]
+  ```
 
 - `array_intersect(array<ANY> x1, array<ANY> x2, ..)` - Returns an intersect 
of given arrays
   ```sql
-  select array_intersect(array(1,3,4),array(2,3,4),array(3,5));
-  > [3]
+  SELECT array_intersect(array(1,3,4),array(2,3,4),array(3,5));
+   [3]
   ```
 
 - `array_remove(array<int|text> original, int|text|array<int> target)` - 
Returns an array that the target is removed from the original array
   ```sql
-  select array_remove(array(1,null,3),array(null));
-  > [3]
+  SELECT array_remove(array(1,null,3),array(null));
+   [3]
 
-  select array_remove(array("aaa","bbb"),"bbb");
-  > ["aaa"]
+  SELECT array_remove(array("aaa","bbb"),"bbb");
+   ["aaa"]
   ```
 
 - `array_slice(array<ANY> values, int offset [, int length])` - Slices the 
given array by the given offset and length parameters.
   ```sql
-  select array_slice(array(1,2,3,4,5,6), 2,4);
-  > [3,4]
+  SELECT 
+    array_slice(array(1,2,3,4,5,6), 2,4),
+    array_slice(
+     array("zero", "one", "two", "three", "four", "five", "six", "seven", 
"eight", "nine", "ten"),
+     0, -- offset
+     2 -- length
+    ),
+    array_slice(
+     array("zero", "one", "two", "three", "four", "five", "six", "seven", 
"eight", "nine", "ten"),
+     6, -- offset
+     3 -- length
+    ),
+    array_slice(
+     array("zero", "one", "two", "three", "four", "five", "six", "seven", 
"eight", "nine", "ten"),
+     6, -- offset
+     10 -- length
+    ),
+    array_slice(
+     array("zero", "one", "two", "three", "four", "five", "six", "seven", 
"eight", "nine", "ten"),
+     6 -- offset
+    ),
+    array_slice(
+     array("zero", "one", "two", "three", "four", "five", "six", "seven", 
"eight", "nine", "ten"),
+     -3 -- offset
+    ),
+    array_slice(
+     array("zero", "one", "two", "three", "four", "five", "six", "seven", 
"eight", "nine", "ten"),
+     -3, -- offset
+     2 -- length
+    );
+
+   [3,4]
+   ["zero","one"] 
+   ["six","seven","eight"]
+   ["six","seven","eight","nine","ten"]
+   ["six","seven","eight","nine","ten"]
+   ["eight","nine","ten"]
+   ["eight","nine"]
   ```
 
 - `array_sum(array<number>)` - Returns an array&lt;double&gt; in which each 
element is summed up
 
+- `array_to_str(array arr [, string sep=','])` - Convert array to string using 
a sperator
+  ```sql
+  SELECT array_to_str(array(1,2,3),'-');
+  1-2-3
+  ```
+
 - `array_union(array1, array2, ...)` - Returns the union of a set of arrays
+  ```sql
+  SELECT array_union(array(1,2),array(1,2));
+  [1,2]
+
+  SELECT array_union(array(1,2),array(2,3),array(2,5));
+  [1,2,3,5]
+  ```
 
 - `conditional_emit(array<boolean> conditions, array<primitive> features)` - 
Emit features of a row according to various conditions
+  ```sql
+  WITH input as (
+     select array(true, false, true) as conditions, array("one", "two", 
"three") as features
+     UNION ALL
+     select array(true, true, false), array("four", "five", "six")
+  )
+  SELECT
+    conditional_emit(
+       conditions, features
+    )
+  FROM 
+    input;
+   one
+   three
+   four
+   five
+  ```
 
 - `element_at(array<T> list, int pos)` - Returns an element at the given 
position
+  ```sql
+  SELECT element_at(array(1,2,3,4),0);
+   1
+
+  SELECT element_at(array(1,2,3,4),-2);
+   3
+  ```
+
+- `first_element(x)` - Returns the first element in an array
+  ```sql
+  SELECT first_element(array('a','b','c'));
+   a
 
-- `first_element(x)` - Returns the first element in an array 
+  SELECT first_element(array());
+   NULL
+  ```
 
 - `float_array(nDims)` - Returns an array&lt;float&gt; of nDims elements
 
 - `last_element(x)` - Return the last element in an array
+  ```sql
+  SELECT last_element(array('a','b','c'));
+   c
+  ```
 
 - `select_k_best(array<number> array, const array<number> importance, const 
int k)` - Returns selected top-k elements as array&lt;double&gt;
 
 - `sort_and_uniq_array(array<int>)` - Takes array&lt;int&gt; and returns a 
sorted array with duplicate elements eliminated
   ```sql
-  select sort_and_uniq_array(array(3,1,1,-2,10));
-  > [-2,1,3,10]
+  SELECT sort_and_uniq_array(array(3,1,1,-2,10));
+   [-2,1,3,10]
   ```
 
 - `subarray_endwith(array<int|text> original, int|text key)` - Returns an 
array that ends with the specified key
   ```sql
-  select subarray_endwith(array(1,2,3,4), 3);
-  > [1,2,3]
+  SELECT subarray_endwith(array(1,2,3,4), 3);
+   [1,2,3]
   ```
 
 - `subarray_startwith(array<int|text> original, int|text key)` - Returns an 
array that starts with the specified key
   ```sql
-  select subarray_startwith(array(1,2,3,4), 2);
-  > [2,3,4]
+  SELECT subarray_startwith(array(1,2,3,4), 2);
+   [2,3,4]
   ```
 
 - `to_string_array(array<ANY>)` - Returns an array of strings
 
 - `to_ordered_list(PRIMITIVE value [, PRIMITIVE key, const string options])` - 
Return list of values sorted by value itself or specific key
   ```sql
-  with t as (
-      select 5 as key, 'apple' as value
-      union all
-      select 3 as key, 'banana' as value
-      union all
-      select 4 as key, 'candy' as value
-      union all
-      select 2 as key, 'donut' as value
-      union all
-      select 3 as key, 'egg' as value
+  WITH t as (
+      SELECT 5 as key, 'apple' as value
+      UNION ALL
+      SELECT 3 as key, 'banana' as value
+      UNION ALL
+      SELECT 4 as key, 'candy' as value
+      UNION ALL
+      SELECT 2 as key, 'donut' as value
+      UNION ALL
+      SELECT 3 as key, 'egg' as value
   )
-  select                                             -- expected output
+  SELECT                                             -- expected output
       to_ordered_list(value, key, '-reverse'),       -- [apple, candy, 
(banana, egg | egg, banana), donut] (reverse order)
       to_ordered_list(value, key, '-k 2'),           -- [apple, candy] (top-k)
       to_ordered_list(value, key, '-k 100'),         -- [apple, candy, 
(banana, egg | egg, banana), dunut]
@@ -117,16 +212,230 @@ This page describes a list of useful Hivemall generic 
functions. See also a [lis
       to_ordered_list(value, '-k 2'),                -- [egg, donut] 
(alphabetically)
       to_ordered_list(key, '-k -2 -reverse'),        -- [5, 4] (top-2 keys)
       to_ordered_list(key)                           -- [2, 3, 3, 4, 5] 
(natural ordered keys)
-  from
+  FROM
       t
   ```
 
+# Bitset
+
+- `bits_collect(int|long x)` - Returns a bitset in array&lt;long&gt;
+
+- `bits_or(array<long> b1, array<long> b2, ..)` - Returns a logical OR given 
bitsets
+  ```sql
+  SELECT unbits(bits_or(to_bits(array(1,4)),to_bits(array(2,3))));
+   [1,2,3,4]
+  ```
+
+- `to_bits(int[] indexes)` - Returns an bitset representation if the given 
indexes in long[]
+  ```sql
+  SELECT to_bits(array(1,2,3,128));
+   [14,-9223372036854775808]
+  ```
+
+- `unbits(long[] bitset)` - Returns an long array of the give bitset 
representation
+  ```sql
+  SELECT unbits(to_bits(array(1,4,2,3)));
+   [1,2,3,4]
+  ```
+
+# Compression
+
+- `deflate(TEXT data [, const int compressionLevel])` - Returns a compressed 
BINARY object by using Deflater. The compression level must be in range [-1,9]
+  ```sql
+  SELECT base91(deflate('aaaaaaaaaaaaaaaabbbbccc'));
+   AA+=kaIM|WTt!+wbGAA
+  ```
+
+- `inflate(BINARY compressedData)` - Returns a decompressed STRING by using 
Inflater
+  ```sql
+  SELECT inflate(unbase91(base91(deflate('aaaaaaaaaaaaaaaabbbbccc'))));
+   aaaaaaaaaaaaaaaabbbbccc
+  ```
+
+# Datetime
+
+- `sessionize(long timeInSec, long thresholdInSec [, String subject])`- 
Returns a UUID string of a session.
+  ```sql
+  SELECT 
+    sessionize(time, 3600, ip_addr) as session_id, 
+    time, ip_addr
+  FROM (
+    SELECT time, ipaddr 
+    FROM weblog 
+    DISTRIBUTE BY ip_addr, time SORT BY ip_addr, time DESC
+  ) t1
+  ```
+
+# JSON
+
+- `from_json(string jsonString, const string returnTypes [, const 
array<string>|const string columnNames])` - Return Hive object.
+  ```sql
+  SELECT
+    from_json(
+      '{ "person" : { "name" : "makoto" , "age" : 37 } }',
+      'struct<name:string,age:int>', 
+      array('person')
+    ),
+    from_json(
+      '[0.1,1.1,2.2]',
+      'array<double>'
+    ),
+    from_json(to_json(
+      ARRAY(
+        NAMED_STRUCT("country", "japan", "city", "tokyo"), 
+        NAMED_STRUCT("country", "japan", "city", "osaka")
+      )
+    ),'array<struct<country:string,city:string>>'),
+    from_json(to_json(
+      ARRAY(
+        NAMED_STRUCT("country", "japan", "city", "tokyo"), 
+        NAMED_STRUCT("country", "japan", "city", "osaka")
+      ),
+      array('city')
+    ), 'array<struct<country:string,city:string>>'),
+    from_json(to_json(
+      ARRAY(
+        NAMED_STRUCT("country", "japan", "city", "tokyo"), 
+        NAMED_STRUCT("country", "japan", "city", "osaka")
+      )
+    ),'array<struct<city:string>>');
+  ```
+
+  ```
+   {"name":"makoto","age":37}
+   [0.1,1.1,2.2]
+   [{"country":"japan","city":"tokyo"},{"country":"japan","city":"osaka"}]
+   [{"country":"japan","city":"tokyo"},{"country":"japan","city":"osaka"}]
+   [{"city":"tokyo"},{"city":"osaka"}]
+  ```
+
+- `to_json(ANY object [, const array<string>|const string columnNames])` - 
Returns Json string
+  ```sql
+  SELECT 
+    NAMED_STRUCT("Name", "John", "age", 31),
+    to_json(
+       NAMED_STRUCT("Name", "John", "age", 31)
+    ),
+    to_json(
+       NAMED_STRUCT("Name", "John", "age", 31),
+       array('Name', 'age')
+    ),
+    to_json(
+       NAMED_STRUCT("Name", "John", "age", 31),
+       array('name', 'age')
+    ),
+    to_json(
+       NAMED_STRUCT("Name", "John", "age", 31),
+       array('age')
+    ),
+    to_json(
+       NAMED_STRUCT("Name", "John", "age", 31),
+       array()
+    ),
+    to_json(
+       null,
+       array()
+    ),
+    to_json(
+      struct("123", "456", 789, array(314,007)),
+      array('ti','si','i','bi')
+    ),
+    to_json(
+      struct("123", "456", 789, array(314,007)),
+      'ti,si,i,bi'
+    ),
+    to_json(
+      struct("123", "456", 789, array(314,007))
+    ),
+    to_json(
+      NAMED_STRUCT("country", "japan", "city", "tokyo")
+    ),
+    to_json(
+      NAMED_STRUCT("country", "japan", "city", "tokyo"), 
+      array('city')
+    ),
+    to_json(
+      ARRAY(
+        NAMED_STRUCT("country", "japan", "city", "tokyo"), 
+        NAMED_STRUCT("country", "japan", "city", "osaka")
+      )
+    ),
+    to_json(
+      ARRAY(
+        NAMED_STRUCT("country", "japan", "city", "tokyo"), 
+        NAMED_STRUCT("country", "japan", "city", "osaka")
+      ),
+      array('city')
+    );
+  ```
+
+  ```
+   {"name":"John","age":31}
+   {"name":"John","age":31}
+   {"Name":"John","age":31}
+   {"name":"John","age":31}
+   {"age":31}
+   {}
+   NULL
+   {"ti":"123","si":"456","i":789,"bi":[314,7]}
+   {"ti":"123","si":"456","i":789,"bi":[314,7]}
+   {"col1":"123","col2":"456","col3":789,"col4":[314,7]}
+   {"country":"japan","city":"tokyo"}
+   {"city":"tokyo"}
+   [{"country":"japan","city":"tokyo"},{"country":"japan","city":"osaka"}]
+   [{"country":"japan","city":"tokyo"},{"country":"japan","city":"osaka"}]
+  ```
+
 # Map
 
+- `map_exclude_keys(Map<K,V> map, array<K> filteringKeys)` - Returns the 
filtered entries of a map not having specified keys
+  ```sql
+  SELECT map_exclude_keys(map(1,'one',2,'two',3,'three'),array(2,3));
+  {1:"one"}
+  ```
+
 - `map_get_sum(map<int,float> src, array<int> keys)` - Returns sum of values 
that are retrieved by keys
 
+- `map_include_keys(Map<K,V> map, array<K> filteringKeys)` - Returns the 
filtered entries of a map having specified keys
+  ```sql
+  SELECT map_include_keys(map(1,'one',2,'two',3,'three'),array(2,3));
+  {2:"two",3:"three"}
+  ```
+
+- `map_index(a, n)` - Returns the n-th element of the given array
+  ```sql
+  WITH tmp as (
+    SELECT "one" as key
+    UNION ALL
+    SELECT "two" as key
+  )
+  SELECT map_index(map("one",1,"two",2),key)
+  FROM tmp;
+
+  1
+  2
+  ```
+
+- `map_key_values(map)` - Returns a array of key-value pairs.
+  ```sql
+  SELECT map_key_values(map("one",1,"two",2));
+
+  [{"key":"one","value":1},{"key":"two","value":2}]
+  ```
+
 - `map_tail_n(map SRC, int N)` - Returns the last N elements from a sorted 
array of SRC
 
+- `merge_maps(x)` - Returns a map which contains the union of an aggregation 
of maps. Note that an existing value of a key can be replaced with the other 
duplicate key entry.
+  ```sql
+  SELECT 
+    merge_maps(m) 
+  FROM (
+    SELECT map('A',10,'B',20,'C',30) 
+    UNION ALL 
+    SELECT map('A',10,'B',20,'C',30)
+  ) t
+  ```
+
 - `to_map(key, value)` - Convert two aggregated columns into a key-value map
 
 - `to_ordered_map(key, value [, const int k|const boolean 
reverseOrder=false])` - Convert two aggregated columns into an ordered 
key-value map
@@ -152,42 +461,6 @@ This page describes a list of useful Hivemall generic 
functions. See also a [lis
   from t
   ```
 
-# Bitset
-
-- `bits_collect(int|long x)` - Returns a bitset in array&lt;long&gt;
-
-- `bits_or(array<long> b1, array<long> b2, ..)` - Returns a logical OR given 
bitsets
-  ```sql
-  select unbits(bits_or(to_bits(array(1,4)),to_bits(array(2,3))));
-  > [1,2,3,4]
-  ```
-
-- `to_bits(int[] indexes)` - Returns an bitset representation if the given 
indexes in long[]
-  ```sql
-  select to_bits(array(1,2,3,128));
-  > [14,-9223372036854775808]
-  ```
-
-- `unbits(long[] bitset)` - Returns an long array of the give bitset 
representation
-  ```sql
-  select unbits(to_bits(array(1,4,2,3)));
-  > [1,2,3,4]
-  ```
-
-# Compression
-
-- `deflate(TEXT data [, const int compressionLevel])` - Returns a compressed 
BINARY object by using Deflater. The compression level must be in range [-1,9]
-  ```sql
-  select base91(deflate('aaaaaaaaaaaaaaaabbbbccc'));
-  > AA+=kaIM|WTt!+wbGAA
-  ```
-
-- `inflate(BINARY compressedData)` - Returns a decompressed STRING by using 
Inflater
-  ```sql
-  select inflate(unbase91(base91(deflate('aaaaaaaaaaaaaaaabbbbccc'))));
-  > aaaaaaaaaaaaaaaabbbbccc
-  ```
-
 # MapReduce
 
 - `distcache_gets(filepath, key, default_value [, parseKey])` - Returns 
map&lt;key_type, value_type&gt;|value_type
@@ -198,9 +471,9 @@ This page describes a list of useful Hivemall generic 
functions. See also a [lis
 
 - `rowid()` - Returns a generated row id of a form {TASK_ID}-{SEQUENCE_NUMBER}
 
-- `rownum()` - Returns a generated row number in long
-  ```
-  returns sprintf(`%d%04d`,sequence,taskId) as long
+- `rownum()` - Returns a generated row number 
`sprintf(`%d%04d`,sequence,taskId)` in long
+  ```sql
+  SELECT rownum() as rownum, xxx from ...
   ```
 
 - `taskid()` - Returns the value of mapred.task.partition
@@ -215,47 +488,91 @@ This page describes a list of useful Hivemall generic 
functions. See also a [lis
 
 - `transpose_and_dot(array<number> matrix0_row, array<number> matrix1_row)` - 
Returns dot(matrix0.T, matrix1) as array&lt;array&lt;double&gt;&gt;, shape = 
(matrix0.#cols, matrix1.#cols)
 
+# Sanity Checks
+
+- `assert(boolean condition)` or _FUNC_(boolean condition, string errMsg)- 
Throws HiveException if condition is not met
+  ```sql
+  SELECT count(1) FROM stock_price WHERE assert(price > 0.0);
+  SELECT count(1) FROM stock_price WHERE assert(price > 0.0, 'price MUST be 
more than 0.0')
+  ```
+
+- `raise_error()` or _FUNC_(string msg) - Throws an error
+  ```sql
+  SELECT product_id, price, raise_error('Found an invalid record') FROM xxx 
WHERE price < 0.0
+  ```
+
 # Text processing
 
 - `base91(BINARY bin)` - Convert the argument from binary to a BASE91 string
   ```sql
-  select base91(deflate('aaaaaaaaaaaaaaaabbbbccc'));
-  > AA+=kaIM|WTt!+wbGAA
+  SELECT base91(deflate('aaaaaaaaaaaaaaaabbbbccc'));
+   AA+=kaIM|WTt!+wbGAA
   ```
 
 - `is_stopword(string word)` - Returns whether English stopword or not
 
 - `normalize_unicode(string str [, string form])` - Transforms `str` with the 
specified normalization form. The `form` takes one of NFC (default), NFD, NFKC, 
or NFKD
   ```sql
-  select normalize_unicode('ï¾ï¾ï½¶ï½¸ï½¶ï¾','NFKC');
-  > ãã³ã«ã¯ã«ã
+  SELECT normalize_unicode('ï¾ï¾ï½¶ï½¸ï½¶ï¾','NFKC');
+   ãã³ã«ã¯ã«ã
 
-  select normalize_unicode('ã±ã§ã¦â¢','NFKC');
-  > (æ ª)ãã³ãã«III
+  SELECT normalize_unicode('ã±ã§ã¦â¢','NFKC');
+   (æ ª)ãã³ãã«III
   ```
 
 - `singularize(string word)` - Returns singular form of a given English word
   ```sql
-  select singularize(lower("Apples"));
+  SELECT singularize(lower("Apples"));
 
-  > "apple"
+   "apple"
   ```
 
-- `split_words(string query [, string regex])` - Returns an array&lt;text&gt; 
containing split strings
+- `split_words(string query [, string regex])` - Returns an array&lt;text&gt; 
containing splitted strings
 
 - `tokenize(string englishText [, boolean toLowerCase])` - Returns tokenized 
words in array&lt;string&gt;
 
 - `unbase91(string)` - Convert a BASE91 string to a binary
   ```sql
-  select inflate(unbase91(base91(deflate('aaaaaaaaaaaaaaaabbbbccc'))));
-  > aaaaaaaaaaaaaaaabbbbccc
+  SELECT inflate(unbase91(base91(deflate('aaaaaaaaaaaaaaaabbbbccc'))));
+   aaaaaaaaaaaaaaaabbbbccc
   ```
 
 - `word_ngrams(array<string> words, int minSize, int maxSize])` - Returns list 
of n-grams for given words, where `minSize &lt;= n &lt;= maxSize`
   ```sql
-  select word_ngrams(tokenize('Machine learning is fun!', true), 1, 2);
+  SELECT word_ngrams(tokenize('Machine learning is fun!', true), 1, 2);
 
-  > ["machine","machine learning","learning","learning is","is","is fun","fun"]
+   ["machine","machine learning","learning","learning is","is","is fun","fun"]
+  ```
+
+# Timeseries
+
+- `moving_avg(NUMBER value, const int windowSize)` - Returns moving average of 
a time series using a given window
+  ```sql
+  SELECT moving_avg(x, 3) FROM (SELECT 
explode(array(1.0,2.0,3.0,4.0,5.0,6.0,7.0)) as x) series;
+   1.0
+   1.5
+   2.0
+   3.0
+   4.0
+   5.0
+   6.0
+  ```
+
+# Vector
+
+- `vector_add(array<NUMBER> x, array<NUMBER> y)` - Perform vector ADD 
operation.
+  ```sql
+  SELECT vector_add(array(1.0,2.0,3.0), array(2, 3, 4));
+  [3.0,5.0,7.0]
+  ```
+
+- `vector_dot(array<NUMBER> x, array<NUMBER> y)` - Performs vector dot product.
+  ```sql
+  SELECT vector_dot(array(1.0,2.0,3.0),array(2.0,3.0,4.0));
+  20
+
+  SELECT vector_dot(array(1.0,2.0,3.0),2);
+  [2.0,4.0,6.0]
   ```
 
 # Others
@@ -264,25 +581,51 @@ This page describes a list of useful Hivemall generic 
functions. See also a [lis
 
 - `each_top_k(int K, Object group, double cmpKey, *)` - Returns top-K values 
(or tail-K values when k is less than 0)
 
-- `generate_series(const int|bigint start, const int|bigint end)` - Generate a 
series of values, from start to end. A similar function to PostgreSQL's 
`generate_serics`. 
http://www.postgresql.org/docs/current/static/functions-srf.html
+- `generate_series(const int|bigint start, const int|bigint end)` - Generate a 
series of values, from start to end. A similar function to PostgreSQL's 
[generate_serics](http://www.postgresql.org/docs/current/static/functions-srf.html)
   ```sql
-  select generate_series(1,9);
+  SELECT generate_series(2,4);
 
-  1
-  2
-  3
-  4
-  5
-  6
-  7
-  8
-  9
+   2
+   3
+   4
+
+  SELECT generate_series(5,1,-2);
+
+   5
+   3
+   1
+
+  SELECT generate_series(4,3);
+
+   (no return)
+
+  SELECT date_add(current_date(),value),value from (SELECT 
generate_series(1,3)) t;
+
+   2018-04-21      1
+   2018-04-22      2
+   2018-04-23      3
+
+  WITH input as (
+   SELECT 1 as c1, 10 as c2, 3 as step
+   UNION ALL
+   SELECT 10, 2, -3
+  )
+  SELECT generate_series(c1, c2, step) as series
+  FROM input;
+
+   1
+   4
+   7
+   10
+   10
+   7
+   4
   ```
 
 - `try_cast(ANY src, const string typeName)` - Explicitly cast a value as a 
type. Returns null if cast fails.
   ```sql
-  Usage: select try_cast(array(1.0,2.0,3.0), 'array<string>')
-       select try_cast(map('A',10,'B',20,'C',30), 'map<string,double>')
+  SELECT try_cast(array(1.0,2.0,3.0), 'array<string>')
+  SELECT try_cast(map('A',10,'B',20,'C',30), 'map<string,double>')
   ```
 
 - `x_rank(KEY)` - Generates a pseudo sequence number starting from 1 for each 
key

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 5a78bd1..aa682c6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -196,6 +196,18 @@
                        </roles>
                        <timezone>+9</timezone>
                </developer>
+               <developer>
+                       <id>jbanks</id>
+                       <name>Jerome Banks</name>
+                       <email>jbanks[at]apache.org</email>
+                       <url>https://github.com/jeromebanks/</url>
+                       <organization>Jumpshot Inc.</organization>
+                       
<organizationUrl>https://www.jumpshot.com/</organizationUrl>
+                       <roles>
+                               <role>Committer</role>
+                       </roles>
+                       <timezone>-8</timezone>
+               </developer>
                <!-- Project mentors -->
                <developer>
                        <id>rvs</id>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/resources/ddl/define-all-as-permanent.hive
----------------------------------------------------------------------
diff --git a/resources/ddl/define-all-as-permanent.hive 
b/resources/ddl/define-all-as-permanent.hive
index e7da8e3..5c257c5 100644
--- a/resources/ddl/define-all-as-permanent.hive
+++ b/resources/ddl/define-all-as-permanent.hive
@@ -435,6 +435,30 @@ CREATE FUNCTION array_intersect as 
'hivemall.tools.array.ArrayIntersectUDF' USIN
 DROP FUNCTION IF EXISTS select_k_best;
 CREATE FUNCTION select_k_best as 'hivemall.tools.array.SelectKBestUDF' USING 
JAR '${hivemall_jar}';
 
+DROP FUNCTION IF EXISTS array_append;
+CREATE FUNCTION array_append as 'hivemall.tools.array.ArrayAppendUDF' USING 
JAR '${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS element_at;
+CREATE FUNCTION element_at as 'hivemall.tools.array.ArrayElementAtUDF' USING 
JAR '${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS array_union;
+CREATE FUNCTION array_union as 'hivemall.tools.array.ArrayUnionUDF' USING JAR 
'${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS first_element;
+CREATE FUNCTION first_element as 'hivemall.tools.array.FirstElementUDF' USING 
JAR '${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS last_element;
+CREATE FUNCTION last_element as 'hivemall.tools.array.LastElementUDF' USING 
JAR '${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS array_flatten;
+CREATE FUNCTION array_flatten as 'hivemall.tools.array.ArrayFlattenUDF' USING 
JAR '${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS array_to_str;
+CREATE FUNCTION array_to_str as 'hivemall.tools.array.ArrayToStrUDF' USING JAR 
'${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS conditional_emit;
+CREATE FUNCTION conditional_emit as 'hivemall.tools.array.ConditionalEmitUDTF' 
USING JAR '${hivemall_jar}';
+
 -----------------------------
 -- bit operation functions --
 -----------------------------
@@ -477,6 +501,18 @@ CREATE FUNCTION to_map as 'hivemall.tools.map.UDAFToMap' 
USING JAR '${hivemall_j
 DROP FUNCTION IF EXISTS to_ordered_map;
 CREATE FUNCTION to_ordered_map as 'hivemall.tools.map.UDAFToOrderedMap' USING 
JAR '${hivemall_jar}';
 
+DROP FUNCTION IF EXISTS map_include_keys;
+CREATE FUNCTION map_include_keys as 'hivemall.tools.map.MapIncludeKeysUDF' 
USING JAR '${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS map_exclude_keys;
+CREATE FUNCTION map_exclude_keys as 'hivemall.tools.map.MapExcludeKeysUDF' 
USING JAR '${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS map_index;
+CREATE FUNCTION map_index as 'hivemall.tools.map.MapIndexUDF' USING JAR 
'${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS map_key_values;
+CREATE FUNCTION map_key_values as 'hivemall.tools.map.MapKeyValuesUDF' USING 
JAR '${hivemall_jar}';
+
 ---------------------
 -- list functions --
 ---------------------
@@ -494,13 +530,19 @@ CREATE FUNCTION sigmoid as 
'hivemall.tools.math.SigmoidGenericUDF' USING JAR '${
 DROP FUNCTION IF EXISTS l2_norm;
 CREATE FUNCTION l2_norm as 'hivemall.tools.math.L2NormUDAF' USING JAR 
'${hivemall_jar}';
 
-----------------------
--- Matrix functions --
-----------------------
+-----------------------------
+-- Matrix/Vector functions --
+-----------------------------
 
 DROP FUNCTION IF EXISTS transpose_and_dot;
 CREATE FUNCTION transpose_and_dot as 
'hivemall.tools.matrix.TransposeAndDotUDAF' USING JAR '${hivemall_jar}';
 
+DROP FUNCTION IF EXISTS vector_add;
+CREATE FUNCTION vector_add as 'hivemall.tools.vector.VectorAddUDF' USING JAR 
'${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS vector_dot;
+CREATE FUNCTION vector_dot as 'hivemall.tools.vector.VectorDotUDF' USING JAR 
'${hivemall_jar}';
+
 ----------------------
 -- mapred functions --
 ----------------------
@@ -524,6 +566,26 @@ DROP FUNCTION IF EXISTS jobconf_gets;
 CREATE FUNCTION jobconf_gets as 'hivemall.tools.mapred.JobConfGetsUDF' USING 
JAR '${hivemall_jar}';
 
 --------------------
+-- JSON functions --
+--------------------
+
+DROP FUNCTION IF EXISTS to_json;
+CREATE FUNCTION to_json as 'hivemall.tools.json.ToJsonUDF' USING JAR 
'${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS from_json;
+CREATE FUNCTION from_json as 'hivemall.tools.json.FromJsonUDF' USING JAR 
'${hivemall_jar}';
+
+----------------------------
+-- Sanity Check functions --
+----------------------------
+
+DROP FUNCTION IF EXISTS assert;
+CREATE FUNCTION assert as 'hivemall.tools.sanity.AssertUDF' USING JAR 
'${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS raise_error;
+CREATE FUNCTION raise_error as 'hivemall.tools.sanity.RaiseErrorUDF' USING JAR 
'${hivemall_jar}';
+
+--------------------
 -- misc functions --
 --------------------
 
@@ -539,6 +601,15 @@ CREATE FUNCTION x_rank as 'hivemall.tools.RankSequenceUDF' 
USING JAR '${hivemall
 DROP FUNCTION IF EXISTS each_top_k;
 CREATE FUNCTION each_top_k as 'hivemall.tools.EachTopKUDTF' USING JAR 
'${hivemall_jar}';
 
+DROP FUNCTION IF EXISTS try_cast;
+CREATE FUNCTION try_cast as 'hivemall.tools.TryCastUDF' USING JAR 
'${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS sessionize;
+CREATE FUNCTION sessionize as 'hivemall.tools.datetime.SessionizeUDF' USING 
JAR '${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS moving_avg;
+CREATE FUNCTION moving_avg as 'hivemall.tools.timeseries.MovingAverageUDTF' 
USING JAR '${hivemall_jar}';
+
 -------------------------------
 -- Text processing functions --
 -------------------------------
@@ -749,6 +820,28 @@ CREATE FUNCTION train_slim as 
'hivemall.recommend.SlimUDTF' USING JAR '${hivemal
 DROP FUNCTION IF EXISTS approx_count_distinct;
 CREATE FUNCTION approx_count_distinct as 
'hivemall.sketch.hll.ApproxCountDistinctUDAF' USING JAR '${hivemall_jar}';
 
+------------------
+-- Bloom Filter --
+------------------
+
+DROP FUNCTION IF EXISTS bloom;
+CREATE FUNCTION bloom as 'hivemall.sketch.bloom.BloomFilterUDAF' USING JAR 
'${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS bloom_and;
+CREATE FUNCTION bloom_and as 'hivemall.sketch.bloom.BloomAndUDF' USING JAR 
'${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS bloom_contains;
+CREATE FUNCTION bloom_contains as 'hivemall.sketch.bloom.BloomContainsUDF' 
USING JAR '${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS bloom_not;
+CREATE FUNCTION bloom_not as 'hivemall.sketch.bloom.BloomNotUDF' USING JAR 
'${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS bloom_or;
+CREATE FUNCTION bloom_or as 'hivemall.sketch.bloom.BloomOrUDF' USING JAR 
'${hivemall_jar}';
+
+DROP FUNCTION IF EXISTS bloom_contains_any;
+CREATE FUNCTION bloom_contains_any as 
'hivemall.sketch.bloom.BloomContainsAnyUDF' USING JAR '${hivemall_jar}';
+
 ------------------------------
 -- XGBoost related features --
 ------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/resources/ddl/define-all.hive
----------------------------------------------------------------------
diff --git a/resources/ddl/define-all.hive b/resources/ddl/define-all.hive
index 9228ce9..fbb3ed2 100644
--- a/resources/ddl/define-all.hive
+++ b/resources/ddl/define-all.hive
@@ -427,6 +427,30 @@ create temporary function array_intersect as 
'hivemall.tools.array.ArrayIntersec
 drop temporary function if exists select_k_best;
 create temporary function select_k_best as 
'hivemall.tools.array.SelectKBestUDF';
 
+drop temporary function if exists array_append;
+create temporary function array_append as 
'hivemall.tools.array.ArrayAppendUDF';
+
+drop temporary function if exists element_at;
+create temporary function element_at as 
'hivemall.tools.array.ArrayElementAtUDF';
+
+drop temporary function if exists array_union;
+create temporary function array_union as 'hivemall.tools.array.ArrayUnionUDF';
+
+drop temporary function if exists first_element;
+create temporary function first_element as 
'hivemall.tools.array.FirstElementUDF';
+
+drop temporary function if exists last_element;
+create temporary function last_element as 
'hivemall.tools.array.LastElementUDF';
+
+drop temporary function if exists array_flatten;
+create temporary function array_flatten as 
'hivemall.tools.array.ArrayFlattenUDF';
+
+drop temporary function if exists array_to_str;
+create temporary function array_to_str as 'hivemall.tools.array.ArrayToStrUDF';
+
+drop temporary function if exists conditional_emit;
+create temporary function conditional_emit as 
'hivemall.tools.array.ConditionalEmitUDTF';
+
 -----------------------------
 -- bit operation functions --
 -----------------------------
@@ -469,6 +493,18 @@ create temporary function to_map as 
'hivemall.tools.map.UDAFToMap';
 drop temporary function if exists to_ordered_map;
 create temporary function to_ordered_map as 
'hivemall.tools.map.UDAFToOrderedMap';
 
+drop temporary function if exists map_include_keys;
+create temporary function map_include_keys as 
'hivemall.tools.map.MapIncludeKeysUDF';
+
+drop temporary function if exists map_exclude_keys;
+create temporary function map_exclude_keys as 
'hivemall.tools.map.MapExcludeKeysUDF';
+
+drop temporary function if exists map_index;
+create temporary function map_index as 'hivemall.tools.map.MapIndexUDF';
+
+drop temporary function if exists map_key_values;
+create temporary function map_key_values as 
'hivemall.tools.map.MapKeyValuesUDF';
+
 ---------------------
 -- list functions --
 ---------------------
@@ -486,13 +522,19 @@ create temporary function sigmoid as 
'hivemall.tools.math.SigmoidGenericUDF';
 drop temporary function if exists l2_norm;
 create temporary function l2_norm as 'hivemall.tools.math.L2NormUDAF';
 
-----------------------
--- Matrix functions --
-----------------------
+-----------------------------
+-- Matrix/Vector functions --
+-----------------------------
 
 drop temporary function if exists transpose_and_dot;
 create temporary function transpose_and_dot as 
'hivemall.tools.matrix.TransposeAndDotUDAF';
 
+drop temporary function if exists vector_add;
+create temporary function vector_add as 'hivemall.tools.vector.VectorAddUDF';
+
+drop temporary function if exists vector_dot;
+create temporary function vector_dot as 'hivemall.tools.vector.VectorDotUDF';
+
 ----------------------
 -- mapred functions --
 ----------------------
@@ -516,6 +558,26 @@ drop temporary function if exists jobconf_gets;
 create temporary function jobconf_gets as 
'hivemall.tools.mapred.JobConfGetsUDF';
 
 --------------------
+-- JSON functions --
+--------------------
+
+drop temporary function if exists to_json;
+create temporary function to_json as 'hivemall.tools.json.ToJsonUDF';
+
+drop temporary function if exists from_json;
+create temporary function from_json as 'hivemall.tools.json.FromJsonUDF';
+
+----------------------------
+-- Sanity Check functions --
+----------------------------
+
+drop temporary function if exists assert;
+create temporary function assert as 'hivemall.tools.sanity.AssertUDF';
+
+drop temporary function if exists raise_error;
+create temporary function raise_error as 'hivemall.tools.sanity.RaiseErrorUDF';
+
+--------------------
 -- misc functions --
 --------------------
 
@@ -531,6 +593,15 @@ create temporary function x_rank as 
'hivemall.tools.RankSequenceUDF';
 drop temporary function if exists each_top_k;
 create temporary function each_top_k as 'hivemall.tools.EachTopKUDTF';
 
+drop temporary function if exists try_cast;
+create temporary function try_cast as 'hivemall.tools.TryCastUDF';
+
+drop temporary function if exists sessionize;
+create temporary function sessionize as 
'hivemall.tools.datetime.SessionizeUDF';
+
+drop temporary function if exists moving_avg;
+create temporary function moving_avg as 
'hivemall.tools.timeseries.MovingAverageUDTF';
+
 -------------------------------
 -- Text processing functions --
 -------------------------------
@@ -741,6 +812,28 @@ create temporary function train_slim as 
'hivemall.recommend.SlimUDTF';
 drop temporary function if exists approx_count_distinct;
 create temporary function approx_count_distinct as 
'hivemall.sketch.hll.ApproxCountDistinctUDAF';
 
+------------------
+-- Bloom Filter --
+------------------
+
+drop temporary function if exists bloom;
+create temporary function bloom as 'hivemall.sketch.bloom.BloomFilterUDAF';
+
+drop temporary function if exists bloom_and;
+create temporary function bloom_and as 'hivemall.sketch.bloom.BloomAndUDF';
+
+drop temporary function if exists bloom_contains;
+create temporary function bloom_contains as 
'hivemall.sketch.bloom.BloomContainsUDF';
+
+drop temporary function if exists bloom_not;
+create temporary function bloom_not as 'hivemall.sketch.bloom.BloomNotUDF';
+
+drop temporary function if exists bloom_or;
+create temporary function bloom_or as 'hivemall.sketch.bloom.BloomOrUDF';
+
+drop temporary function if exists bloom_contains_any;
+create temporary function bloom_contains_any as 
'hivemall.sketch.bloom.BloomContainsAnyUDF';
+
 
--------------------------------------------------------------------------------------------------
 -- macros available from hive 0.12.0
 -- see https://issues.apache.org/jira/browse/HIVE-2655

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/resources/ddl/define-all.spark
----------------------------------------------------------------------
diff --git a/resources/ddl/define-all.spark b/resources/ddl/define-all.spark
index 3764ca2..e78a966 100644
--- a/resources/ddl/define-all.spark
+++ b/resources/ddl/define-all.spark
@@ -425,6 +425,30 @@ sqlContext.sql("CREATE TEMPORARY FUNCTION array_intersect 
AS 'hivemall.tools.arr
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS select_k_best")
 sqlContext.sql("CREATE TEMPORARY FUNCTION select_k_best AS 
'hivemall.tools.array.SelectKBestUDF'")
 
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS array_append")
+sqlContext.sql("CREATE TEMPORARY FUNCTION array_append AS 
'hivemall.tools.array.ArrayAppendUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS element_at")
+sqlContext.sql("CREATE TEMPORARY FUNCTION element_at AS 
'hivemall.tools.array.ArrayElementAtUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS array_union")
+sqlContext.sql("CREATE TEMPORARY FUNCTION array_union AS 
'hivemall.tools.array.ArrayUnionUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS first_element")
+sqlContext.sql("CREATE TEMPORARY FUNCTION first_element AS 
'hivemall.tools.array.FirstElementUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS last_element")
+sqlContext.sql("CREATE TEMPORARY FUNCTION last_element AS 
'hivemall.tools.array.LastElementUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS array_flatten")
+sqlContext.sql("CREATE TEMPORARY FUNCTION array_flatten AS 
'hivemall.tools.array.ArrayFlattenUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS array_to_str")
+sqlContext.sql("CREATE TEMPORARY FUNCTION array_to_str AS 
'hivemall.tools.array.ArrayToStrUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS conditional_emit")
+sqlContext.sql("CREATE TEMPORARY FUNCTION conditional_emit AS 
'hivemall.tools.array.ConditionalEmitUDTF'")
+
 /**
  * Bit operation functions
  */
@@ -467,6 +491,18 @@ sqlContext.sql("CREATE TEMPORARY FUNCTION to_map AS 
'hivemall.tools.map.UDAFToMa
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS to_ordered_map")
 sqlContext.sql("CREATE TEMPORARY FUNCTION to_ordered_map AS 
'hivemall.tools.map.UDAFToOrderedMap'")
 
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS map_include_keys")
+sqlContext.sql("CREATE TEMPORARY FUNCTION map_include_keys AS 
'hivemall.tools.map.MapIncludeKeysUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS map_exclude_keys")
+sqlContext.sql("CREATE TEMPORARY FUNCTION map_exclude_keys AS 
'hivemall.tools.map.MapExcludeKeysUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS map_index")
+sqlContext.sql("CREATE TEMPORARY FUNCTION map_index AS 
'hivemall.tools.map.MapIndexUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS map_key_values")
+sqlContext.sql("CREATE TEMPORARY FUNCTION map_key_values AS 
'hivemall.tools.map.MapKeyValuesUDF'")
+
 /**
  * List functions
  */
@@ -485,12 +521,18 @@ sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS 
l2_norm")
 sqlContext.sql("CREATE TEMPORARY FUNCTION l2_norm AS 
'hivemall.tools.math.L2NormUDAF'")
 
 /**
- * Matrix functions
+ * Matrix/Vector functions
  */
 
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS transpose_and_dot")
 sqlContext.sql("CREATE TEMPORARY FUNCTION transpose_and_dot AS 
'hivemall.tools.matrix.TransposeAndDotUDAF'")
 
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS vector_add")
+sqlContext.sql("CREATE TEMPORARY FUNCTION vector_add AS 
'hivemall.tools.vector.VectorAddUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS vector_dot")
+sqlContext.sql("CREATE TEMPORARY FUNCTION vector_dot AS 
'hivemall.tools.vector.VectorDotUDF'")
+
 /**
  * MAPRED functions
  */
@@ -499,6 +541,26 @@ sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS rowid")
 sqlContext.sql("CREATE TEMPORARY FUNCTION rowid AS 
'hivemall.tools.mapred.RowIdUDFWrapper'")
 
 /**
+ * JSON functions
+ */
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS to_json")
+sqlContext.sql("CREATE TEMPORARY FUNCTION to_json AS 
'hivemall.tools.json.ToJsonUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS from_json")
+sqlContext.sql("CREATE TEMPORARY FUNCTION from_json AS 
'hivemall.tools.json.FromJsonUDF'")
+
+/**
+ * Sanity Check functions
+ */
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS assert")
+sqlContext.sql("CREATE TEMPORARY FUNCTION assert AS 
'hivemall.tools.sanity.AssertUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS raise_error")
+sqlContext.sql("CREATE TEMPORARY FUNCTION raise_error AS 
'hivemall.tools.sanity.RaiseErrorUDF'")
+
+/**
  * MISC functions
  */
 
@@ -514,6 +576,15 @@ sqlContext.sql("CREATE TEMPORARY FUNCTION x_rank AS 
'hivemall.tools.RankSequence
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS each_top_k")
 sqlContext.sql("CREATE TEMPORARY FUNCTION each_top_k AS 
'hivemall.tools.EachTopKUDTF'")
 
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS try_cast")
+sqlContext.sql("CREATE TEMPORARY FUNCTION try_cast AS 
'hivemall.tools.TryCastUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS sessionize")
+sqlContext.sql("CREATE TEMPORARY FUNCTION sessionize AS 
'hivemall.tools.datetime.SessionizeUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS moving_avg")
+sqlContext.sql("CREATE TEMPORARY FUNCTION moving_avg AS 
'hivemall.tools.timeseries.MovingAverageUDTF'")
+
 /**
  * Text processing functions
  */
@@ -726,3 +797,25 @@ sqlContext.sql("CREATE TEMPORARY FUNCTION train_slim AS 
'hivemall.recommend.Slim
 sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS approx_count_distinct")
 sqlContext.sql("CREATE TEMPORARY FUNCTION approx_count_distinct AS 
'hivemall.sketch.hll.ApproxCountDistinctUDAF'")
 
+
+/**
+ * Bloom Filter
+ */
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS bloom")
+sqlContext.sql("CREATE TEMPORARY FUNCTION bloom AS 
'hivemall.sketch.bloom.BloomFilterUDAF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS bloom_and")
+sqlContext.sql("CREATE TEMPORARY FUNCTION bloom_and AS 
'hivemall.sketch.bloom.BloomAndUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS bloom_contains")
+sqlContext.sql("CREATE TEMPORARY FUNCTION bloom_contains AS 
'hivemall.sketch.bloom.BloomContainsUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS bloom_not")
+sqlContext.sql("CREATE TEMPORARY FUNCTION bloom_not AS 
'hivemall.sketch.bloom.BloomNotUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS bloom_or")
+sqlContext.sql("CREATE TEMPORARY FUNCTION bloom_or AS 
'hivemall.sketch.bloom.BloomOrUDF'")
+
+sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS bloom_contains_any")
+sqlContext.sql("CREATE TEMPORARY FUNCTION bloom_contains_any AS 
'hivemall.sketch.bloom.BloomContainsAnyUDF'")

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/resources/ddl/define-udfs.td.hql
----------------------------------------------------------------------
diff --git a/resources/ddl/define-udfs.td.hql b/resources/ddl/define-udfs.td.hql
index b106eda..2352390 100644
--- a/resources/ddl/define-udfs.td.hql
+++ b/resources/ddl/define-udfs.td.hql
@@ -155,7 +155,7 @@ create temporary function train_randomforest_regr as 
'hivemall.smile.regression.
 create temporary function tree_predict as 
'hivemall.smile.tools.TreePredictUDF';
 create temporary function rf_ensemble as 
'hivemall.smile.tools.RandomForestEnsembleUDAF';
 create temporary function guess_attribute_types as 
'hivemall.smile.tools.GuessAttributesUDF';
--- since Hivemall v0.5
+-- since Hivemall v0.5.0
 create temporary function changefinder as 'hivemall.anomaly.ChangeFinderUDF';
 create temporary function sst as 
'hivemall.anomaly.SingularSpectrumTransformUDF';
 create temporary function train_lda as 'hivemall.topicmodel.LDAUDTF';
@@ -183,7 +183,35 @@ create temporary function train_slim as 
'hivemall.recommend.SlimUDTF';
 create temporary function hitrate as 'hivemall.evaluation.HitRateUDAF';
 create temporary function word_ngrams as 'hivemall.tools.text.WordNgramsUDF';
 create temporary function approx_count_distinct as 
'hivemall.sketch.hll.ApproxCountDistinctUDAF';
+-- since Hivemall v0.5.2
 create temporary function array_slice as 'hivemall.tools.array.ArraySliceUDF';
+create temporary function try_cast as 'hivemall.tools.TryCastUDF';
+create temporary function array_append as 
'hivemall.tools.array.ArrayAppendUDF';
+create temporary function element_at as 
'hivemall.tools.array.ArrayElementAtUDF';
+create temporary function array_union as 'hivemall.tools.array.ArrayUnionUDF';
+create temporary function first_element as 
'hivemall.tools.array.FirstElementUDF';
+create temporary function last_element as 
'hivemall.tools.array.LastElementUDF';
+create temporary function array_flatten as 
'hivemall.tools.array.ArrayFlattenUDF';
+create temporary function map_include_keys as 
'hivemall.tools.map.MapIncludeKeysUDF';
+create temporary function map_exclude_keys as 
'hivemall.tools.map.MapExcludeKeysUDF';
+create temporary function array_to_str as 'hivemall.tools.array.ArrayToStrUDF';
+create temporary function map_index as 'hivemall.tools.map.MapIndexUDF';
+create temporary function map_key_values as 
'hivemall.tools.map.MapKeyValuesUDF';
+create temporary function sessionize as 
'hivemall.tools.datetime.SessionizeUDF';
+create temporary function to_json as 'hivemall.tools.json.ToJsonUDF';
+create temporary function from_json as 'hivemall.tools.json.FromJsonUDF';
+create temporary function assert as 'hivemall.tools.sanity.AssertUDF';
+create temporary function raise_error as 'hivemall.tools.sanity.RaiseErrorUDF';
+create temporary function moving_avg as 
'hivemall.tools.timeseries.MovingAverageUDTF';
+create temporary function vector_add as 'hivemall.tools.vector.VectorAddUDF';
+create temporary function vector_dot as 'hivemall.tools.vector.VectorDotUDF';
+create temporary function bloom as 'hivemall.sketch.bloom.BloomFilterUDAF';
+create temporary function bloom_and as 'hivemall.sketch.bloom.BloomAndUDF';
+create temporary function bloom_contains as 
'hivemall.sketch.bloom.BloomContainsUDF';
+create temporary function bloom_not as 'hivemall.sketch.bloom.BloomNotUDF';
+create temporary function bloom_or as 'hivemall.sketch.bloom.BloomOrUDF';
+create temporary function bloom_contains_any as 
'hivemall.sketch.bloom.BloomContainsAnyUDF';
+create temporary function conditional_emit as 
'hivemall.tools.array.ConditionalEmitUDTF';
 
 -- NLP features
 create temporary function tokenize_ja as 'hivemall.nlp.tokenizer.KuromojiUDF';

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/spark/spark-2.2/pom.xml
----------------------------------------------------------------------
diff --git a/spark/spark-2.2/pom.xml b/spark/spark-2.2/pom.xml
index 100de59..5cb3609 100644
--- a/spark/spark-2.2/pom.xml
+++ b/spark/spark-2.2/pom.xml
@@ -141,7 +141,7 @@
                                                
<JAVA_HOME>${env.JAVA8_HOME}</JAVA_HOME>
                                                
<PATH>${env.JAVA8_HOME}/bin:${env.PATH}</PATH>
                                        </environmentVariables>
-                               </configuration>
+                               </configuration>                                
                        </plugin>
                </plugins>
        </build>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/tools/hivemall-docs/pom.xml
----------------------------------------------------------------------
diff --git a/tools/hivemall-docs/pom.xml b/tools/hivemall-docs/pom.xml
index 76994d7..99cd138 100644
--- a/tools/hivemall-docs/pom.xml
+++ b/tools/hivemall-docs/pom.xml
@@ -16,7 +16,9 @@
   specific language governing permissions and limitations
   under the License.
 -->
-<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
        <modelVersion>4.0.0</modelVersion>
 
        <parent>
@@ -56,46 +58,94 @@
                        <scope>provided</scope>
                </dependency>
 
-               <!-- hivemall dependencies -->
+               <!-- compile scope -->
+               <dependency>
+                       <groupId>org.apache.hive</groupId>
+                       <artifactId>hive-exec</artifactId>
+                       <scope>compile</scope>
+               </dependency>
+               <dependency>
+                       <groupId>org.reflections</groupId>
+                       <artifactId>reflections</artifactId>
+                       <version>0.9.10</version>
+                       <scope>compile</scope>
+               </dependency>
+
+               <!-- runtime hivemall dependencies using reflection -->
                <dependency>
                        <groupId>org.apache.hivemall</groupId>
                        <artifactId>hivemall-core</artifactId>
                        <version>${project.version}</version>
-                       <scope>compile</scope>
+                       <scope>runtime</scope>
                </dependency>
                <dependency>
                        <groupId>org.apache.hivemall</groupId>
                        <artifactId>hivemall-nlp</artifactId>
                        <version>${project.version}</version>
-                       <scope>compile</scope>
+                       <scope>runtime</scope>
                </dependency>
                <dependency>
                        <groupId>org.apache.hivemall</groupId>
                        <artifactId>hivemall-xgboost</artifactId>
                        <version>${project.version}</version>
-                       <scope>compile</scope>
+                       <scope>runtime</scope>
                </dependency>
 
-               <!-- compile scope -->
-               <dependency>
-                       <groupId>org.apache.hive</groupId>
-                       <artifactId>hive-exec</artifactId>
-                       <scope>compile</scope>
-               </dependency>
-               <dependency>
-                       <groupId>com.google.guava</groupId>
-                       <artifactId>guava</artifactId>
-                       <scope>compile</scope>
-               </dependency>
-               <dependency>
-                       <groupId>org.reflections</groupId>
-                       <artifactId>reflections</artifactId>
-                       <version>0.9.10</version>
-                       <scope>compile</scope>
-               </dependency>
        </dependencies>
 
        <build>
+               <pluginManagement>
+                       <plugins>
+                               <!--This plugin's configuration is used to 
store Eclipse m2e settings
+                                       only. It has no influence on the Maven 
build itself. -->
+                               <plugin>
+                                       <groupId>org.eclipse.m2e</groupId>
+                                       
<artifactId>lifecycle-mapping</artifactId>
+                                       <version>1.0.0</version>
+                                       <configuration>
+                                               <lifecycleMappingMetadata>
+                                                       <pluginExecutions>
+                                                               
<pluginExecution>
+                                                                       
<pluginExecutionFilter>
+                                                                               
<groupId>org.codehaus.plexus</groupId>
+                                                                               
<artifactId>plexus-component-metadata</artifactId>
+                                                                               
<versionRange>[1.5.5,)</versionRange>
+                                                                               
<goals>
+                                                                               
        <goal>generate-metadata</goal>
+                                                                               
</goals>
+                                                                       
</pluginExecutionFilter>
+                                                                       <action>
+                                                                               
<execute>
+                                                                               
        <runOnIncremental>false</runOnIncremental>
+                                                                               
</execute>
+                                                                       
</action>
+                                                               
</pluginExecution>
+                                                               
<pluginExecution>
+                                                                       
<pluginExecutionFilter>
+                                                                               
<groupId>
+                                                                               
        org.apache.maven.plugins
+                                                                               
</groupId>
+                                                                               
<artifactId>
+                                                                               
        maven-plugin-plugin
+                                                                               
</artifactId>
+                                                                               
<versionRange>
+                                                                               
        [3.2,)
+                                                                               
</versionRange>
+                                                                               
<goals>
+                                                                               
        <goal>descriptor</goal>
+                                                                               
</goals>
+                                                                       
</pluginExecutionFilter>
+                                                                       <action>
+                                                                               
<ignore></ignore>
+                                                                       
</action>
+                                                               
</pluginExecution>
+                                                       </pluginExecutions>
+                                               </lifecycleMappingMetadata>
+                                       </configuration>
+                               </plugin>
+                       </plugins>
+               </pluginManagement>
+
                <plugins>
                        <plugin>
                                <groupId>org.apache.maven.plugins</groupId>
@@ -106,6 +156,12 @@
                                                <phase>process-classes</phase>
                                        </execution>
                                        <execution>
+                                               <id>mojo-descriptor</id>
+                                               <goals>
+                                                       <goal>descriptor</goal>
+                                               </goals>
+                                       </execution>
+                                       <execution>
                                                <id>generate-helpmojo</id>
                                                <goals>
                                                        <goal>helpmojo</goal>
@@ -113,6 +169,19 @@
                                        </execution>
                                </executions>
                        </plugin>
+                       <plugin>
+                               <groupId>org.codehaus.plexus</groupId>
+                               
<artifactId>plexus-component-metadata</artifactId>
+                               <version>1.7.1</version>
+                               <executions>
+                                       <execution>
+                                               <goals>
+                                                       
<goal>generate-metadata</goal>
+                                               </goals>
+                                       </execution>
+                               </executions>
+                       </plugin>
                </plugins>
        </build>
+
 </project>

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/49496032/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGenerator.java
----------------------------------------------------------------------
diff --git 
a/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGenerator.java 
b/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGenerator.java
deleted file mode 100644
index 61fea68..0000000
--- a/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGenerator.java
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package hivemall.docs;
-
-import static hivemall.docs.utils.MarkdownUtils.asCodeBlock;
-import static hivemall.docs.utils.MarkdownUtils.asInlineCode;
-import static hivemall.docs.utils.MarkdownUtils.asListElement;
-import static hivemall.docs.utils.MarkdownUtils.indent;
-import static org.apache.commons.lang.StringEscapeUtils.escapeHtml;
-
-import hivemall.utils.lang.StringUtils;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileReader;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeSet;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import javax.annotation.Nonnull;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.maven.execution.MavenSession;
-import org.apache.maven.plugin.AbstractMojo;
-import org.apache.maven.plugin.MojoExecutionException;
-import org.apache.maven.plugins.annotations.Mojo;
-import org.apache.maven.plugins.annotations.Parameter;
-import org.reflections.Reflections;
-
-/**
- * Generate a list of UDFs for documentation.
- *
- * @link 
https://hivemall.incubator.apache.org/userguide/misc/generic_funcs.html
- * @link https://hivemall.incubator.apache.org/userguide/misc/funcs.html
- */
-@Mojo(name = "generate-funcs-list")
-public class FuncsListGenerator extends AbstractMojo {
-
-    @Parameter(defaultValue = "${basedir}", readonly = true)
-    private File basedir;
-
-    @Parameter(defaultValue = "${session}", readonly = true)
-    private MavenSession session;
-
-    @Parameter(defaultValue = "docs/gitbook/misc/generic_funcs.md")
-    private String pathToGenericFuncs;
-
-    @Parameter(defaultValue = "docs/gitbook/misc/funcs.md")
-    private String pathToFuncs;
-
-    private static final Map<String, List<String>> genericFuncsHeaders = new 
LinkedHashMap<>();
-    static {
-        genericFuncsHeaders.put("# Array",
-            Arrays.asList("hivemall.tools.array", "hivemall.tools.list"));
-        genericFuncsHeaders.put("# Map", 
Collections.singletonList("hivemall.tools.map"));
-        genericFuncsHeaders.put("# Bitset", 
Collections.singletonList("hivemall.tools.bits"));
-        genericFuncsHeaders.put("# Compression",
-            Collections.singletonList("hivemall.tools.compress"));
-        genericFuncsHeaders.put("# MapReduce", 
Collections.singletonList("hivemall.tools.mapred"));
-        genericFuncsHeaders.put("# Math", 
Collections.singletonList("hivemall.tools.math"));
-        genericFuncsHeaders.put("# Matrix", 
Collections.singletonList("hivemall.tools.matrix"));
-        genericFuncsHeaders.put("# Text processing",
-            Collections.singletonList("hivemall.tools.text"));
-        genericFuncsHeaders.put("# Others", 
Collections.singletonList("hivemall.tools"));
-    }
-
-    private static final Map<String, List<String>> funcsHeaders = new 
LinkedHashMap<>();
-    static {
-        funcsHeaders.put("# Regression", 
Collections.singletonList("hivemall.regression"));
-        funcsHeaders.put("# Classification", Collections.<String>emptyList());
-        funcsHeaders.put("## Binary classification",
-            Collections.singletonList("hivemall.classifier"));
-        funcsHeaders.put("## Multiclass classification",
-            Collections.singletonList("hivemall.classifier.multiclass"));
-        funcsHeaders.put("# Matrix factorization", 
Collections.singletonList("hivemall.mf"));
-        funcsHeaders.put("# Factorization machines", 
Collections.singletonList("hivemall.fm"));
-        funcsHeaders.put("# Recommendation", 
Collections.singletonList("hivemall.recommend"));
-        funcsHeaders.put("# Anomaly detection", 
Collections.singletonList("hivemall.anomaly"));
-        funcsHeaders.put("# Topic modeling", 
Collections.singletonList("hivemall.topicmodel"));
-        funcsHeaders.put("# Preprocessing", 
Collections.singletonList("hivemall.ftvec"));
-        funcsHeaders.put("## Data amplification",
-            Collections.singletonList("hivemall.ftvec.amplify"));
-        funcsHeaders.put("## Feature binning", 
Collections.singletonList("hivemall.ftvec.binning"));
-        funcsHeaders.put("## Feature format conversion",
-            Collections.singletonList("hivemall.ftvec.conv"));
-        funcsHeaders.put("## Feature hashing", 
Collections.singletonList("hivemall.ftvec.hashing"));
-        funcsHeaders.put("## Feature paring", 
Collections.singletonList("hivemall.ftvec.pairing"));
-        funcsHeaders.put("## Ranking", 
Collections.singletonList("hivemall.ftvec.ranking"));
-        funcsHeaders.put("## Feature scaling", 
Collections.singletonList("hivemall.ftvec.scaling"));
-        funcsHeaders.put("## Feature selection",
-            Collections.singletonList("hivemall.ftvec.selection"));
-        funcsHeaders.put("## Feature transformation and vectorization",
-            Collections.singletonList("hivemall.ftvec.trans"));
-        funcsHeaders.put("# Geospatial functions",
-            Collections.singletonList("hivemall.geospatial"));
-        funcsHeaders.put("# Distance measures", 
Collections.singletonList("hivemall.knn.distance"));
-        funcsHeaders.put("# Locality-sensitive hashing",
-            Collections.singletonList("hivemall.knn.lsh"));
-        funcsHeaders.put("# Similarity measures",
-            Collections.singletonList("hivemall.knn.similarity"));
-        funcsHeaders.put("# Evaluation", 
Collections.singletonList("hivemall.evaluation"));
-        funcsHeaders.put("# Sketching", 
Collections.singletonList("hivemall.sketch.hll"));
-        funcsHeaders.put("# Ensemble learning", 
Collections.singletonList("hivemall.ensemble"));
-        funcsHeaders.put("## Bagging", 
Collections.singletonList("hivemall.ensemble.bagging"));
-        funcsHeaders.put("# Decision trees and RandomForest", Arrays.asList(
-            "hivemall.smile.classification", "hivemall.smile.regression", 
"hivemall.smile.tools"));
-        funcsHeaders.put("# XGBoost", 
Arrays.asList("hivemall.xgboost.classification",
-            "hivemall.xgboost.regression", "hivemall.xgboost.tools"));
-        funcsHeaders.put("# Others",
-            Arrays.asList("hivemall", "hivemall.dataset", 
"hivemall.ftvec.text"));
-    }
-
-    @Override
-    public void execute() throws MojoExecutionException {
-        if (!isReactorRootProject()) {
-            // output only once across the projects
-            return;
-        }
-
-        generate(new File(basedir, pathToGenericFuncs),
-            "This page describes a list of useful Hivemall generic functions. 
See also a [list of machine-learning-related functions](./funcs.md).",
-            genericFuncsHeaders);
-        generate(new File(basedir, pathToFuncs),
-            "This page describes a list of Hivemall functions. See also a 
[list of generic Hivemall functions](./generic_funcs.md) for more 
general-purpose functions such as array and map UDFs.",
-            funcsHeaders);
-    }
-
-    private boolean isReactorRootProject() {
-        return 
session.getExecutionRootDirectory().equalsIgnoreCase(basedir.toString());
-    }
-
-    private void generate(@Nonnull File outputFile, @Nonnull String preface,
-            @Nonnull Map<String, List<String>> headers) throws 
MojoExecutionException {
-        Reflections reflections = new Reflections("hivemall");
-        Set<Class<?>> annotatedClasses = 
reflections.getTypesAnnotatedWith(Description.class);
-
-        StringBuilder sb = new StringBuilder();
-        Map<String, Set<String>> packages = new HashMap<>();
-
-        Pattern func = Pattern.compile("_FUNC_(\\(.*?\\))(.*)", 
Pattern.DOTALL);
-
-        for (Class<?> annotatedClass : annotatedClasses) {
-            Deprecated deprecated = 
annotatedClass.getAnnotation(Deprecated.class);
-            if (deprecated != null) {
-                continue;
-            }
-
-            Description description = 
annotatedClass.getAnnotation(Description.class);
-
-            String value = description.value().replaceAll("\n", " ");
-            Matcher matcher = func.matcher(value);
-            if (matcher.find()) {
-                value = asInlineCode(description.name() + matcher.group(1))
-                        + escapeHtml(matcher.group(2));
-            }
-            sb.append(asListElement(value));
-
-            StringBuilder sbExtended = new StringBuilder();
-            if (!description.extended().isEmpty()) {
-                sbExtended.append(description.extended());
-                sb.append("\n");
-            }
-
-            String extended = sbExtended.toString();
-            if (extended.isEmpty()) {
-                sb.append("\n");
-            } else {
-                if (extended.toLowerCase().contains("select")) { // extended 
description contains SQL statements
-                    sb.append(indent(asCodeBlock(extended, "sql")));
-                } else {
-                    sb.append(indent(asCodeBlock(extended)));
-                }
-            }
-
-            String packageName = annotatedClass.getPackage().getName();
-            if (!packages.containsKey(packageName)) {
-                Set<String> set = new TreeSet<>();
-                packages.put(packageName, set);
-            }
-            Set<String> List = packages.get(packageName);
-            List.add(sb.toString());
-
-            StringUtils.clear(sb);
-        }
-
-        try (PrintWriter writer = new PrintWriter(outputFile)) {
-            // license header
-            writer.println("<!--");
-            try {
-                File licenseFile = new File(basedir, 
"resources/license-header.txt");
-                FileReader fileReader = new FileReader(licenseFile);
-
-                try (BufferedReader bufferedReader = new 
BufferedReader(fileReader)) {
-                    String line;
-                    while ((line = bufferedReader.readLine()) != null) {
-                        writer.println(indent(line));
-                    }
-                }
-            } catch (IOException e) {
-                throw new MojoExecutionException("Failed to read license 
file");
-            }
-            writer.println("-->\n");
-
-            writer.println(preface);
-
-            writer.println("\n<!-- toc -->\n");
-
-            for (Map.Entry<String, List<String>> e : headers.entrySet()) {
-                writer.println(e.getKey() + "\n");
-                List<String> packageNames = e.getValue();
-                for (String packageName : packageNames) {
-                    for (String desc : packages.get(packageName)) {
-                        writer.println(desc);
-                    }
-                }
-            }
-
-            writer.flush();
-        } catch (FileNotFoundException e) {
-            throw new MojoExecutionException("Output file is not found");
-        }
-    }
-}

[2/4] incubator-hivemall git commit: [HIVEMALL-145] Merge Brickhouse functions

Reply via email to