This is an automated email from the ASF dual-hosted git repository. myui pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-hivemall.git
commit 29147ef4566aed483e7e4ca575e95377c131fe31 Author: Makoto Yui <[email protected]> AuthorDate: Fri Feb 8 15:10:54 2019 +0900 Refined tutorial documents --- core/src/main/java/hivemall/tools/map/MapGetUDF.java | 6 ++++-- .../main/java/hivemall/tools/map/MapKeyValuesUDF.java | 4 ++-- core/src/main/java/hivemall/tools/map/MergeMapsUDAF.java | 2 +- core/src/main/java/hivemall/tools/map/UDAFToMap.java | 13 ++++++++++++- core/src/main/java/hivemall/tools/math/IsFiniteUDF.java | 3 ++- core/src/main/java/hivemall/tools/math/L2NormUDAF.java | 10 +++++++++- core/src/main/java/hivemall/tools/math/NanUDF.java | 3 ++- .../main/java/hivemall/tools/math/SigmoidGenericUDF.java | 16 +++++++++++++++- .../main/java/hivemall/docs/FuncsListGeneratorMojo.java | 3 +-- 9 files changed, 48 insertions(+), 12 deletions(-) diff --git a/core/src/main/java/hivemall/tools/map/MapGetUDF.java b/core/src/main/java/hivemall/tools/map/MapGetUDF.java index 3ea1138..27e1c22 100644 --- a/core/src/main/java/hivemall/tools/map/MapGetUDF.java +++ b/core/src/main/java/hivemall/tools/map/MapGetUDF.java @@ -35,8 +35,10 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; //@formatter:off @Description(name = "map_get", - value = "_FUNC_(MAP<K> a, K n) - Returns the value corresponding to the key in the map", - extended = "WITH tmp as (\n" + + value = "_FUNC_(MAP<K> a, K n) - Returns the value corresponding to the key in the map.", + extended = "Note this is a workaround for a Hive issue that non-constant expression for map indexes not supported.\n" + + "See https://issues.apache.org/jira/browse/HIVE-1955\n\n" + + "WITH tmp as (\n" + " SELECT \"one\" as key\n" + " UNION ALL\n" + " SELECT \"two\" as key\n" + diff --git a/core/src/main/java/hivemall/tools/map/MapKeyValuesUDF.java b/core/src/main/java/hivemall/tools/map/MapKeyValuesUDF.java index 3992f9e..b2c0c75 100644 --- a/core/src/main/java/hivemall/tools/map/MapKeyValuesUDF.java +++ b/core/src/main/java/hivemall/tools/map/MapKeyValuesUDF.java @@ -38,9 +38,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @Description(name = "map_key_values", - value = "_FUNC_(map) - " + "Returns a array of key-value pairs.", + value = "_FUNC_(MAP<K, V> map) - " + "Returns a array of key-value pairs in array<named_struct<key,value>>", extended = "SELECT map_key_values(map(\"one\",1,\"two\",2));\n\n" - + "[{\"key\":\"one\",\"value\":1},{\"key\":\"two\",\"value\":2}]") + + "> [{\"key\":\"one\",\"value\":1},{\"key\":\"two\",\"value\":2}]") @UDFType(deterministic = true, stateful = false) public final class MapKeyValuesUDF extends GenericUDF { diff --git a/core/src/main/java/hivemall/tools/map/MergeMapsUDAF.java b/core/src/main/java/hivemall/tools/map/MergeMapsUDAF.java index e4a2516..fc25326 100644 --- a/core/src/main/java/hivemall/tools/map/MergeMapsUDAF.java +++ b/core/src/main/java/hivemall/tools/map/MergeMapsUDAF.java @@ -41,7 +41,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; //@formatter:off @Description(name = "merge_maps", - value = "_FUNC_(x) - Returns a map which contains the union of an aggregation of maps." + value = "_FUNC_(Map x) - Returns a map which contains the union of an aggregation of maps." + " Note that an existing value of a key can be replaced with the other duplicate key entry.", extended = "SELECT \n" + " merge_maps(m) \n" + diff --git a/core/src/main/java/hivemall/tools/map/UDAFToMap.java b/core/src/main/java/hivemall/tools/map/UDAFToMap.java index b203909..c19852d 100644 --- a/core/src/main/java/hivemall/tools/map/UDAFToMap.java +++ b/core/src/main/java/hivemall/tools/map/UDAFToMap.java @@ -44,8 +44,19 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; * * @see https://cwiki.apache.org/Hive/genericudafcasestudy.html */ +// @formatter:off @Description(name = "to_map", - value = "_FUNC_(key, value) - Convert two aggregated columns into a key-value map") + value = "_FUNC_(key, value) - Convert two aggregated columns into a key-value map", + extended = "WITH input as (\n" + + " select 'aaa' as key, 111 as value\n" + + " UNION all\n" + + " select 'bbb' as key, 222 as value\n" + + ")\n" + + "select to_map(key, value)\n" + + "from input;\n" + + "\n" + + "> {\"bbb\":222,\"aaa\":111}") +// @formatter:on public class UDAFToMap extends AbstractGenericUDAFResolver { @Override diff --git a/core/src/main/java/hivemall/tools/math/IsFiniteUDF.java b/core/src/main/java/hivemall/tools/math/IsFiniteUDF.java index 8c1f83c..4d6550f 100644 --- a/core/src/main/java/hivemall/tools/math/IsFiniteUDF.java +++ b/core/src/main/java/hivemall/tools/math/IsFiniteUDF.java @@ -21,7 +21,8 @@ package hivemall.tools.math; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; -@Description(name = "is_finite", value = "_FUNC_(x) - Determine if x is infinite.") +@Description(name = "is_finite", value = "_FUNC_(x) - Determine if x is finite.", + extended = "SELECT is_finite(333), is_finite(infinity());\n" + "> true false") public final class IsFiniteUDF extends UDF { public Boolean evaluate(Double num) { if (num == null) { diff --git a/core/src/main/java/hivemall/tools/math/L2NormUDAF.java b/core/src/main/java/hivemall/tools/math/L2NormUDAF.java index 921272a..dc65801 100644 --- a/core/src/main/java/hivemall/tools/math/L2NormUDAF.java +++ b/core/src/main/java/hivemall/tools/math/L2NormUDAF.java @@ -24,9 +24,17 @@ import org.apache.hadoop.hive.ql.exec.UDAFEvaluator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.DoubleWritable; +// @formatter:off @SuppressWarnings("deprecation") @Description(name = "l2_norm", - value = "_FUNC_(double xi) - Return L2 norm of a vector which has the given values in each dimension") + value = "_FUNC_(double x) - Return a L2 norm of the given input x.", + extended = "WITH input as (\n" + + " select generate_series(1,3) as v\n" + + ")\n" + + "select l2_norm(v) as l2norm\n" + + "from input;\n" + + "> 3.7416573867739413 = sqrt(1^2+2^2+3^2))") +// @formatter:on public final class L2NormUDAF extends UDAF { public static class Evaluator implements UDAFEvaluator { diff --git a/core/src/main/java/hivemall/tools/math/NanUDF.java b/core/src/main/java/hivemall/tools/math/NanUDF.java index 51a6c1a..f00a5ba 100644 --- a/core/src/main/java/hivemall/tools/math/NanUDF.java +++ b/core/src/main/java/hivemall/tools/math/NanUDF.java @@ -21,7 +21,8 @@ package hivemall.tools.math; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; -@Description(name = "nan", value = "_FUNC_() - Returns the constant representing not-a-number.") +@Description(name = "nan", value = "_FUNC_() - Returns the constant representing not-a-number.", + extended = "SELECT nan(), is_nan(nan());\n" + "> NaN true") public final class NanUDF extends UDF { public double evaluate() { return Double.NaN; diff --git a/core/src/main/java/hivemall/tools/math/SigmoidGenericUDF.java b/core/src/main/java/hivemall/tools/math/SigmoidGenericUDF.java index 881d94d..d097a18 100644 --- a/core/src/main/java/hivemall/tools/math/SigmoidGenericUDF.java +++ b/core/src/main/java/hivemall/tools/math/SigmoidGenericUDF.java @@ -37,7 +37,21 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.io.FloatWritable; -@Description(name = "sigmoid", value = "_FUNC_(x) - Returns 1.0 / (1.0 + exp(-x))") +// @formatter:off +@Description(name = "sigmoid", value = "_FUNC_(x) - Returns 1.0 / (1.0 + exp(-x))", + extended = "WITH input as (\n" + + " SELECT 3.0 as x\n" + + " UNION ALL\n" + + " SELECT -3.0 as x\n" + + ")\n" + + "select \n" + + " 1.0 / (1.0 + exp(-x)),\n" + + " sigmoid(x)\n" + + "from\n" + + " input;\n" + + "> 0.04742587317756678 0.04742587357759476\n" + + "> 0.9525741268224334 0.9525741338729858") +// @formatter:on @UDFType(deterministic = true, stateful = false) public final class SigmoidGenericUDF extends GenericUDF { diff --git a/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java b/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java index 0d58b3f..72645c0 100644 --- a/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java +++ b/tools/hivemall-docs/src/main/java/hivemall/docs/FuncsListGeneratorMojo.java @@ -88,14 +88,13 @@ public class FuncsListGeneratorMojo extends AbstractMojo { genericFuncsHeaders.put("# Map", Collections.singletonList("hivemall.tools.map")); genericFuncsHeaders.put("# MapReduce", Collections.singletonList("hivemall.tools.mapred")); genericFuncsHeaders.put("# Math", Collections.singletonList("hivemall.tools.math")); - genericFuncsHeaders.put("# Matrix", Collections.singletonList("hivemall.tools.matrix")); + genericFuncsHeaders.put("# Vector/Matrix", Arrays.asList("hivemall.tools.matrix", "hivemall.tools.vector")); genericFuncsHeaders.put("# Sanity Checks", Collections.singletonList("hivemall.tools.sanity")); genericFuncsHeaders.put("# Text processing", Collections.singletonList("hivemall.tools.text")); genericFuncsHeaders.put("# Timeseries", Collections.singletonList("hivemall.tools.timeseries")); - genericFuncsHeaders.put("# Vector", Collections.singletonList("hivemall.tools.vector")); genericFuncsHeaders.put("# Others", Collections.singletonList("hivemall.tools")); }
