Repository: incubator-hivemall Updated Branches: refs/heads/master f15379878 -> 8639810d3
Close #109: [HIVEMALL-140] Rename PrecisionUDAF and RecallUDAF Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/8639810d Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/8639810d Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/8639810d Branch: refs/heads/master Commit: 8639810d332c408c8eab0882dbb62297dbc8895d Parents: f153798 Author: Takuya Kitazawa <[email protected]> Authored: Wed Sep 13 21:25:56 2017 +0900 Committer: Makoto Yui <[email protected]> Committed: Wed Sep 13 21:25:56 2017 +0900 ---------------------------------------------------------------------- .../src/main/java/hivemall/evaluation/PrecisionUDAF.java | 2 +- core/src/main/java/hivemall/evaluation/RecallUDAF.java | 2 +- docs/gitbook/eval/rank.md | 11 +++++++---- docs/gitbook/recommend/movielens_cf.md | 7 +++++-- resources/ddl/define-all-as-permanent.hive | 8 ++++---- resources/ddl/define-all.hive | 8 ++++---- resources/ddl/define-all.spark | 8 ++++---- resources/ddl/define-udfs.td.hql | 4 ++-- 8 files changed, 28 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8639810d/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java b/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java index d4fad41..93af519 100644 --- a/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java +++ b/core/src/main/java/hivemall/evaluation/PrecisionUDAF.java @@ -47,7 +47,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.LongWritable; @Description( - name = "precision", + name = "precision_at", value = "_FUNC_(array rankItems, array correctItems [, const int recommendSize = rankItems.size])" + " - Returns Precision") public final class PrecisionUDAF extends AbstractGenericUDAFResolver { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8639810d/core/src/main/java/hivemall/evaluation/RecallUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/evaluation/RecallUDAF.java b/core/src/main/java/hivemall/evaluation/RecallUDAF.java index c07b858..fed9f71 100644 --- a/core/src/main/java/hivemall/evaluation/RecallUDAF.java +++ b/core/src/main/java/hivemall/evaluation/RecallUDAF.java @@ -47,7 +47,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.LongWritable; @Description( - name = "recall", + name = "recall_at", value = "_FUNC_(array rankItems, array correctItems [, const int recommendSize = rankItems.size])" + " - Returns Recall") public final class RecallUDAF extends AbstractGenericUDAFResolver { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8639810d/docs/gitbook/eval/rank.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/eval/rank.md b/docs/gitbook/eval/rank.md index db681ac..207418e 100644 --- a/docs/gitbook/eval/rank.md +++ b/docs/gitbook/eval/rank.md @@ -92,12 +92,12 @@ select -- rec = [1,3,2,6], truth = [1,2,4] for each user -- Recall@k - recall(t1.rec, t2.truth, t1.max_k) as recall, - recall(t1.rec, t2.truth, 2) as recall_at_2, + recall_at(t1.rec, t2.truth, t1.max_k) as recall, + recall_at(t1.rec, t2.truth, 2) as recall_at_2, -- Precision@k - precision(t1.rec, t2.truth, t1.max_k) as precision, - precision(t1.rec, t2.truth, 2) as precision_at_2, + precision_at(t1.rec, t2.truth, t1.max_k) as precision, + precision_at(t1.rec, t2.truth, 2) as precision_at_2, -- MAP average_precision(t1.rec, t2.truth, t1.max_k) as average_precision, @@ -132,6 +132,9 @@ We have six different measures, and outputs will be: Here, we introduce the six measures for evaluation of ranked list of items. Importantly, each metric has a different concept behind formulation, and the accuracy measured by the metrics shows different values even for the exactly same input as demonstrated above. Thus, evaluation using multiple ranking measures is more convincing, and it should be easy in Hivemall. +> #### Caution +> Before Hivemall v0.5-rc.1, `recall_at()` and `precision_at()` are respectively registered as `recall()` and `precision()`. However, since `precision` is a reserved keyword from Hive v2.2.0, [we renamed the function names](https://issues.apache.org/jira/browse/HIVEMALL-140). If you are still using `recall()` and/or `precision()`, we strongly recommend you to use the latest version of Hivemall and replace them with the newer function names. + ## Recall-At-k **Recall-at-k (Recall@k)** indicates coverage of truth samples as a result of top-k recommendation. The value is computed by the following equation: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8639810d/docs/gitbook/recommend/movielens_cf.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/recommend/movielens_cf.md b/docs/gitbook/recommend/movielens_cf.md index 08268a8..1cf5aee 100644 --- a/docs/gitbook/recommend/movielens_cf.md +++ b/docs/gitbook/recommend/movielens_cf.md @@ -234,8 +234,8 @@ with truth as ( userid ) select - recall(t1.rec_movies, t2.truth, 10) as recall, - precision(t1.rec_movies, t2.truth, 10) as precision, + recall_at(t1.rec_movies, t2.truth, 10) as recall, + precision_at(t1.rec_movies, t2.truth, 10) as precision, average_precision(t1.rec_movies, t2.truth) as average_precision, auc(t1.rec_movies, t2.truth) as auc, mrr(t1.rec_movies, t2.truth) as mrr, @@ -259,3 +259,6 @@ where -- at least 10 recommended items are necessary to compute recall@10 and pr |**NDCG**| 0.15787655209987522 | If you set larger value to the DIMSUM's `-threshold` option, similarity will be more aggressively approximated. Consequently, while efficiency is improved, the accuracy is likely to be decreased. + +> #### Caution +> Before Hivemall v0.5-rc.1, `recall_at()` and `precision_at()` are respectively registered as `recall()` and `precision()`. However, since `precision` is a reserved keyword from Hive v2.2.0, [we renamed the function names](https://issues.apache.org/jira/browse/HIVEMALL-140). If you are still using `recall()` and/or `precision()`, we strongly recommend you to use the latest version of Hivemall and replace them with the newer function names. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8639810d/resources/ddl/define-all-as-permanent.hive ---------------------------------------------------------------------- diff --git a/resources/ddl/define-all-as-permanent.hive b/resources/ddl/define-all-as-permanent.hive index cda33f9..b1c0075 100644 --- a/resources/ddl/define-all-as-permanent.hive +++ b/resources/ddl/define-all-as-permanent.hive @@ -579,11 +579,11 @@ CREATE FUNCTION r2 as 'hivemall.evaluation.R2UDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS ndcg; CREATE FUNCTION ndcg as 'hivemall.evaluation.NDCGUDAF' USING JAR '${hivemall_jar}'; -DROP FUNCTION IF EXISTS precision; -CREATE FUNCTION precision as 'hivemall.evaluation.PrecisionUDAF' USING JAR '${hivemall_jar}'; +DROP FUNCTION IF EXISTS precision_at; +CREATE FUNCTION precision_at as 'hivemall.evaluation.PrecisionUDAF' USING JAR '${hivemall_jar}'; -DROP FUNCTION IF EXISTS recall; -CREATE FUNCTION recall as 'hivemall.evaluation.RecallUDAF' USING JAR '${hivemall_jar}'; +DROP FUNCTION IF EXISTS recall_at; +CREATE FUNCTION recall_at as 'hivemall.evaluation.RecallUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS mrr; CREATE FUNCTION mrr as 'hivemall.evaluation.MRRUDAF' USING JAR '${hivemall_jar}'; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8639810d/resources/ddl/define-all.hive ---------------------------------------------------------------------- diff --git a/resources/ddl/define-all.hive b/resources/ddl/define-all.hive index 6e116ac..e1933b4 100644 --- a/resources/ddl/define-all.hive +++ b/resources/ddl/define-all.hive @@ -571,11 +571,11 @@ create temporary function r2 as 'hivemall.evaluation.R2UDAF'; drop temporary function if exists ndcg; create temporary function ndcg as 'hivemall.evaluation.NDCGUDAF'; -drop temporary function if exists precision; -create temporary function precision as 'hivemall.evaluation.PrecisionUDAF'; +drop temporary function if exists precision_at; +create temporary function precision_at as 'hivemall.evaluation.PrecisionUDAF'; -drop temporary function if exists recall; -create temporary function recall as 'hivemall.evaluation.RecallUDAF'; +drop temporary function if exists recall_at; +create temporary function recall_at as 'hivemall.evaluation.RecallUDAF'; drop temporary function if exists mrr; create temporary function mrr as 'hivemall.evaluation.MRRUDAF'; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8639810d/resources/ddl/define-all.spark ---------------------------------------------------------------------- diff --git a/resources/ddl/define-all.spark b/resources/ddl/define-all.spark index d3eb3cd..db29d85 100644 --- a/resources/ddl/define-all.spark +++ b/resources/ddl/define-all.spark @@ -555,11 +555,11 @@ sqlContext.sql("CREATE TEMPORARY FUNCTION r2 AS 'hivemall.evaluation.R2UDAF'") sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS ndcg") sqlContext.sql("CREATE TEMPORARY FUNCTION ndcg AS 'hivemall.evaluation.NDCGUDAF'") -sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS precision") -sqlContext.sql("CREATE TEMPORARY FUNCTION precision AS 'hivemall.evaluation.PrecisionUDAF'") +sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS precision_at") +sqlContext.sql("CREATE TEMPORARY FUNCTION precision_at AS 'hivemall.evaluation.PrecisionUDAF'") -sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS recall") -sqlContext.sql("CREATE TEMPORARY FUNCTION recall AS 'hivemall.evaluation.RecallUDAF'") +sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS recall_at") +sqlContext.sql("CREATE TEMPORARY FUNCTION recall_at AS 'hivemall.evaluation.RecallUDAF'") sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS mrr") sqlContext.sql("CREATE TEMPORARY FUNCTION mrr AS 'hivemall.evaluation.MRRUDAF'") http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8639810d/resources/ddl/define-udfs.td.hql ---------------------------------------------------------------------- diff --git a/resources/ddl/define-udfs.td.hql b/resources/ddl/define-udfs.td.hql index 2662260..7c9bfc7 100644 --- a/resources/ddl/define-udfs.td.hql +++ b/resources/ddl/define-udfs.td.hql @@ -136,8 +136,8 @@ create temporary function mse as 'hivemall.evaluation.MeanSquaredErrorUDAF'; create temporary function rmse as 'hivemall.evaluation.RootMeanSquaredErrorUDAF'; create temporary function r2 as 'hivemall.evaluation.R2UDAF'; create temporary function ndcg as 'hivemall.evaluation.NDCGUDAF'; -create temporary function precision as 'hivemall.evaluation.PrecisionUDAF'; -create temporary function recall as 'hivemall.evaluation.RecallUDAF'; +create temporary function precision_at as 'hivemall.evaluation.PrecisionUDAF'; +create temporary function recall_at as 'hivemall.evaluation.RecallUDAF'; create temporary function mrr as 'hivemall.evaluation.MRRUDAF'; create temporary function average_precision as 'hivemall.evaluation.MAPUDAF'; create temporary function auc as 'hivemall.evaluation.AUCUDAF';
