Updated license headers of *.md Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/fc97a52e Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/fc97a52e Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/fc97a52e
Branch: refs/heads/master Commit: fc97a52eadd512edfb368c6a6b81455a99a534da Parents: 24f1779 Author: myui <[email protected]> Authored: Mon Nov 7 14:49:10 2016 +0900 Committer: myui <[email protected]> Committed: Mon Nov 7 14:49:10 2016 +0900 ---------------------------------------------------------------------- README.md | 23 ++++++++++++++++++-- docs/gitbook/README.md | 19 ++++++++++++++++ docs/gitbook/SUMMARY.md | 19 ++++++++++++++++ docs/gitbook/anomaly/lof.md | 19 ++++++++++++++++ docs/gitbook/binaryclass/a9a.md | 19 ++++++++++++++++ docs/gitbook/binaryclass/a9a_dataset.md | 19 ++++++++++++++++ docs/gitbook/binaryclass/a9a_lr.md | 19 ++++++++++++++++ docs/gitbook/binaryclass/a9a_minibatch.md | 19 ++++++++++++++++ docs/gitbook/binaryclass/kdd2010a.md | 19 ++++++++++++++++ docs/gitbook/binaryclass/kdd2010a_dataset.md | 19 ++++++++++++++++ docs/gitbook/binaryclass/kdd2010a_scw.md | 19 ++++++++++++++++ docs/gitbook/binaryclass/kdd2010b.md | 19 ++++++++++++++++ docs/gitbook/binaryclass/kdd2010b_arow.md | 19 ++++++++++++++++ docs/gitbook/binaryclass/kdd2010b_dataset.md | 19 ++++++++++++++++ docs/gitbook/binaryclass/news20.md | 19 ++++++++++++++++ docs/gitbook/binaryclass/news20_adagrad.md | 19 ++++++++++++++++ docs/gitbook/binaryclass/news20_dataset.md | 19 ++++++++++++++++ docs/gitbook/binaryclass/news20_pa.md | 19 ++++++++++++++++ docs/gitbook/binaryclass/news20_scw.md | 19 ++++++++++++++++ docs/gitbook/binaryclass/webspam.md | 19 ++++++++++++++++ docs/gitbook/binaryclass/webspam_dataset.md | 19 ++++++++++++++++ docs/gitbook/binaryclass/webspam_scw.md | 19 ++++++++++++++++ docs/gitbook/eval/datagen.md | 19 ++++++++++++++++ docs/gitbook/eval/lr_datagen.md | 19 ++++++++++++++++ docs/gitbook/eval/stat_eval.md | 19 ++++++++++++++++ docs/gitbook/ft_engineering/ft_trans.md | 19 ++++++++++++++++ docs/gitbook/ft_engineering/hashing.md | 19 ++++++++++++++++ docs/gitbook/ft_engineering/quantify.md | 19 ++++++++++++++++ docs/gitbook/ft_engineering/scaling.md | 19 ++++++++++++++++ docs/gitbook/ft_engineering/tfidf.md | 19 ++++++++++++++++ docs/gitbook/ft_engineering/vectorizer.md | 19 ++++++++++++++++ docs/gitbook/getting_started/README.md | 19 ++++++++++++++++ docs/gitbook/getting_started/input-format.md | 19 ++++++++++++++++ docs/gitbook/getting_started/installation.md | 19 ++++++++++++++++ .../getting_started/permanent-functions.md | 19 ++++++++++++++++ docs/gitbook/misc/generic_funcs.md | 19 ++++++++++++++++ docs/gitbook/misc/tokenizer.md | 19 ++++++++++++++++ docs/gitbook/misc/topk.md | 19 ++++++++++++++++ docs/gitbook/multiclass/iris.md | 19 ++++++++++++++++ docs/gitbook/multiclass/iris_dataset.md | 19 ++++++++++++++++ docs/gitbook/multiclass/iris_randomforest.md | 19 ++++++++++++++++ docs/gitbook/multiclass/iris_scw.md | 19 ++++++++++++++++ docs/gitbook/multiclass/news20.md | 19 ++++++++++++++++ docs/gitbook/multiclass/news20_dataset.md | 19 ++++++++++++++++ docs/gitbook/multiclass/news20_ensemble.md | 19 ++++++++++++++++ .../multiclass/news20_one-vs-the-rest.md | 19 ++++++++++++++++ .../news20_one-vs-the-rest_dataset.md | 19 ++++++++++++++++ docs/gitbook/multiclass/news20_pa.md | 19 ++++++++++++++++ docs/gitbook/multiclass/news20_scw.md | 19 ++++++++++++++++ docs/gitbook/recommend/cf.md | 19 ++++++++++++++++ docs/gitbook/recommend/item_based_cf.md | 19 ++++++++++++++++ docs/gitbook/recommend/movielens.md | 19 ++++++++++++++++ docs/gitbook/recommend/movielens_cv.md | 19 ++++++++++++++++ docs/gitbook/recommend/movielens_dataset.md | 19 ++++++++++++++++ docs/gitbook/recommend/movielens_fm.md | 19 ++++++++++++++++ docs/gitbook/recommend/movielens_mf.md | 19 ++++++++++++++++ docs/gitbook/recommend/news20.md | 19 ++++++++++++++++ docs/gitbook/recommend/news20_bbit_minhash.md | 19 ++++++++++++++++ docs/gitbook/recommend/news20_jaccard.md | 19 ++++++++++++++++ docs/gitbook/recommend/news20_knn.md | 19 ++++++++++++++++ docs/gitbook/regression/e2006.md | 19 ++++++++++++++++ docs/gitbook/regression/e2006_arow.md | 19 ++++++++++++++++ docs/gitbook/regression/e2006_dataset.md | 19 ++++++++++++++++ docs/gitbook/regression/kddcup12tr2.md | 19 ++++++++++++++++ docs/gitbook/regression/kddcup12tr2_adagrad.md | 19 ++++++++++++++++ docs/gitbook/regression/kddcup12tr2_dataset.md | 19 ++++++++++++++++ docs/gitbook/regression/kddcup12tr2_lr.md | 19 ++++++++++++++++ .../regression/kddcup12tr2_lr_amplify.md | 19 ++++++++++++++++ docs/gitbook/tips/README.md | 19 ++++++++++++++++ docs/gitbook/tips/addbias.md | 19 ++++++++++++++++ docs/gitbook/tips/emr.md | 19 ++++++++++++++++ docs/gitbook/tips/ensemble_learning.md | 19 ++++++++++++++++ docs/gitbook/tips/general_tips.md | 19 ++++++++++++++++ docs/gitbook/tips/hadoop_tuning.md | 19 ++++++++++++++++ docs/gitbook/tips/mixserver.md | 19 ++++++++++++++++ docs/gitbook/tips/rand_amplify.md | 19 ++++++++++++++++ docs/gitbook/tips/rowid.md | 19 ++++++++++++++++ docs/gitbook/tips/rt_prediction.md | 19 ++++++++++++++++ docs/gitbook/troubleshooting/README.md | 19 ++++++++++++++++ docs/gitbook/troubleshooting/asterisk.md | 19 ++++++++++++++++ .../troubleshooting/mapjoin_classcastex.md | 19 ++++++++++++++++ .../troubleshooting/mapjoin_task_error.md | 19 ++++++++++++++++ docs/gitbook/troubleshooting/num_mappers.md | 19 ++++++++++++++++ docs/gitbook/troubleshooting/oom.md | 19 ++++++++++++++++ pom.xml | 5 +++++ resources/header-definition.xml | 12 ++++++++++ 86 files changed, 1615 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/README.md ---------------------------------------------------------------------- diff --git a/README.md b/README.md index 75ad6cd..79333aa 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,24 @@ -Hivemall: Hive scalable machine learning library -================================================= +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +Apache Hivemall: Hive scalable machine learning library +======================================================= [](https://travis-ci.org/myui/hivemall) [](https://scan.coverity.com/projects/4549) [](https://readthedocs.org/projects/hivemall-docs/?badge=latest) http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/README.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/README.md b/docs/gitbook/README.md index 82602f8..7b61570 100644 --- a/docs/gitbook/README.md +++ b/docs/gitbook/README.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + # Introduction <div class="alert alert-info"> http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/SUMMARY.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/SUMMARY.md b/docs/gitbook/SUMMARY.md index d85f952..7ef1b9b 100644 --- a/docs/gitbook/SUMMARY.md +++ b/docs/gitbook/SUMMARY.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + # Summary ## TABLE OF CONTENTS http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/anomaly/lof.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/anomaly/lof.md b/docs/gitbook/anomaly/lof.md index f8f0b61..48990f8 100644 --- a/docs/gitbook/anomaly/lof.md +++ b/docs/gitbook/anomaly/lof.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + This article introduce how to find outliers using [Local Outlier Detection (LOF)](http://en.wikipedia.org/wiki/Local_outlier_factor) on Hivemall. # Data Preparation http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/binaryclass/a9a.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/binaryclass/a9a.md b/docs/gitbook/binaryclass/a9a.md index e69de29..2959148 100644 --- a/docs/gitbook/binaryclass/a9a.md +++ b/docs/gitbook/binaryclass/a9a.md @@ -0,0 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/binaryclass/a9a_dataset.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/binaryclass/a9a_dataset.md b/docs/gitbook/binaryclass/a9a_dataset.md index 28bcd57..76ccb0d 100644 --- a/docs/gitbook/binaryclass/a9a_dataset.md +++ b/docs/gitbook/binaryclass/a9a_dataset.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + a9a === http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html#a9a http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/binaryclass/a9a_lr.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/binaryclass/a9a_lr.md b/docs/gitbook/binaryclass/a9a_lr.md index 5029c49..17d91c0 100644 --- a/docs/gitbook/binaryclass/a9a_lr.md +++ b/docs/gitbook/binaryclass/a9a_lr.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + a9a === http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html#a9a http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/binaryclass/a9a_minibatch.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/binaryclass/a9a_minibatch.md b/docs/gitbook/binaryclass/a9a_minibatch.md index 714db6a..eaa7a06 100644 --- a/docs/gitbook/binaryclass/a9a_minibatch.md +++ b/docs/gitbook/binaryclass/a9a_minibatch.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + This page explains how to apply [Mini-Batch Gradient Descent](https://class.coursera.org/ml-003/lecture/106) for the training of logistic regression explained in [this example](https://github.com/myui/hivemall/wiki/a9a-binary-classification-(logistic-regression)). See [this page](https://github.com/myui/hivemall/wiki/a9a-binary-classification-(logistic-regression)) first. This content depends on it. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/binaryclass/kdd2010a.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/binaryclass/kdd2010a.md b/docs/gitbook/binaryclass/kdd2010a.md index e69de29..2959148 100644 --- a/docs/gitbook/binaryclass/kdd2010a.md +++ b/docs/gitbook/binaryclass/kdd2010a.md @@ -0,0 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/binaryclass/kdd2010a_dataset.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/binaryclass/kdd2010a_dataset.md b/docs/gitbook/binaryclass/kdd2010a_dataset.md index 731d68b..ca221c3 100644 --- a/docs/gitbook/binaryclass/kdd2010a_dataset.md +++ b/docs/gitbook/binaryclass/kdd2010a_dataset.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + [http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html#kdd2010 (algebra)](http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html#kdd2010 (algebra)) * # of classes: 2 http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/binaryclass/kdd2010a_scw.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/binaryclass/kdd2010a_scw.md b/docs/gitbook/binaryclass/kdd2010a_scw.md index ee8fbba..5cb19fc 100644 --- a/docs/gitbook/binaryclass/kdd2010a_scw.md +++ b/docs/gitbook/binaryclass/kdd2010a_scw.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + # PA1 ## Train ```sql http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/binaryclass/kdd2010b.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/binaryclass/kdd2010b.md b/docs/gitbook/binaryclass/kdd2010b.md index e69de29..2959148 100644 --- a/docs/gitbook/binaryclass/kdd2010b.md +++ b/docs/gitbook/binaryclass/kdd2010b.md @@ -0,0 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/binaryclass/kdd2010b_arow.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/binaryclass/kdd2010b_arow.md b/docs/gitbook/binaryclass/kdd2010b_arow.md index 7ac845a..2ca0d90 100644 --- a/docs/gitbook/binaryclass/kdd2010b_arow.md +++ b/docs/gitbook/binaryclass/kdd2010b_arow.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + ## training ```sql -- SET mapred.reduce.tasks=32; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/binaryclass/kdd2010b_dataset.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/binaryclass/kdd2010b_dataset.md b/docs/gitbook/binaryclass/kdd2010b_dataset.md index 5e26dba..41f0513 100644 --- a/docs/gitbook/binaryclass/kdd2010b_dataset.md +++ b/docs/gitbook/binaryclass/kdd2010b_dataset.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + [http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html#kdd2010 (bridge to algebra)](http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html#kdd2010 (bridge to algebra)) * # of classes: 2 http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/binaryclass/news20.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/binaryclass/news20.md b/docs/gitbook/binaryclass/news20.md index e69de29..2959148 100644 --- a/docs/gitbook/binaryclass/news20.md +++ b/docs/gitbook/binaryclass/news20.md @@ -0,0 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/binaryclass/news20_adagrad.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/binaryclass/news20_adagrad.md b/docs/gitbook/binaryclass/news20_adagrad.md index 08e39df..cbcc0f2 100644 --- a/docs/gitbook/binaryclass/news20_adagrad.md +++ b/docs/gitbook/binaryclass/news20_adagrad.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + _Note that this feature is supported since Hivemall v0.3-beta2 or later._ ## UDF preparation http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/binaryclass/news20_dataset.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/binaryclass/news20_dataset.md b/docs/gitbook/binaryclass/news20_dataset.md index 5ff80cd..87208cf 100644 --- a/docs/gitbook/binaryclass/news20_dataset.md +++ b/docs/gitbook/binaryclass/news20_dataset.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + Get the news20b dataset. http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html#news20.binary http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/binaryclass/news20_pa.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/binaryclass/news20_pa.md b/docs/gitbook/binaryclass/news20_pa.md index 7763a15..df082b9 100644 --- a/docs/gitbook/binaryclass/news20_pa.md +++ b/docs/gitbook/binaryclass/news20_pa.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + ## UDF preparation ``` delete jar /home/myui/tmp/hivemall.jar; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/binaryclass/news20_scw.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/binaryclass/news20_scw.md b/docs/gitbook/binaryclass/news20_scw.md index 0b27dab..fa1da7f 100644 --- a/docs/gitbook/binaryclass/news20_scw.md +++ b/docs/gitbook/binaryclass/news20_scw.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + ## UDF preparation ``` use news20; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/binaryclass/webspam.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/binaryclass/webspam.md b/docs/gitbook/binaryclass/webspam.md index e69de29..2959148 100644 --- a/docs/gitbook/binaryclass/webspam.md +++ b/docs/gitbook/binaryclass/webspam.md @@ -0,0 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/binaryclass/webspam_dataset.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/binaryclass/webspam_dataset.md b/docs/gitbook/binaryclass/webspam_dataset.md index 4686865..40cac07 100644 --- a/docs/gitbook/binaryclass/webspam_dataset.md +++ b/docs/gitbook/binaryclass/webspam_dataset.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + Get the dataset from http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html#webspam http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/binaryclass/webspam_scw.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/binaryclass/webspam_scw.md b/docs/gitbook/binaryclass/webspam_scw.md index 635b32d..cadd0ab 100644 --- a/docs/gitbook/binaryclass/webspam_scw.md +++ b/docs/gitbook/binaryclass/webspam_scw.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + # Preparation ``` http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/eval/datagen.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/eval/datagen.md b/docs/gitbook/eval/datagen.md index e69de29..2959148 100644 --- a/docs/gitbook/eval/datagen.md +++ b/docs/gitbook/eval/datagen.md @@ -0,0 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/eval/lr_datagen.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/eval/lr_datagen.md b/docs/gitbook/eval/lr_datagen.md index 55cb360..8fa5239 100644 --- a/docs/gitbook/eval/lr_datagen.md +++ b/docs/gitbook/eval/lr_datagen.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + _Note this feature is supported on hivemall v0.2-alpha3 or later._ # create a dual table http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/eval/stat_eval.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/eval/stat_eval.md b/docs/gitbook/eval/stat_eval.md index 7f1688b..6b0af8e 100644 --- a/docs/gitbook/eval/stat_eval.md +++ b/docs/gitbook/eval/stat_eval.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + Using the [E2006 tfidf regression example](https://github.com/myui/hivemall/wiki/E2006-tfidf-regression-evaluation-(PA,-AROW)), we explain how to evaluate the prediction model on Hive. # Scoring by evaluation metrics http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/ft_engineering/ft_trans.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/ft_engineering/ft_trans.md b/docs/gitbook/ft_engineering/ft_trans.md index e69de29..2959148 100644 --- a/docs/gitbook/ft_engineering/ft_trans.md +++ b/docs/gitbook/ft_engineering/ft_trans.md @@ -0,0 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/ft_engineering/hashing.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/ft_engineering/hashing.md b/docs/gitbook/ft_engineering/hashing.md index 09fa1ff..daf4a23 100644 --- a/docs/gitbook/ft_engineering/hashing.md +++ b/docs/gitbook/ft_engineering/hashing.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + Hivemall supports [Feature Hashing](https://github.com/myui/hivemall/wiki/Feature-hashing) (a.k.a. hashing trick) through `feature_hashing` and `mhash` functions. Find the differences in the following examples. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/ft_engineering/quantify.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/ft_engineering/quantify.md b/docs/gitbook/ft_engineering/quantify.md index 1d6a223..952db53 100644 --- a/docs/gitbook/ft_engineering/quantify.md +++ b/docs/gitbook/ft_engineering/quantify.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + `quantified_features` is useful for transforming values of non-number columns to indexed numbers. *Note: The feature is supported Hivemall v0.4 or later.* http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/ft_engineering/scaling.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/ft_engineering/scaling.md b/docs/gitbook/ft_engineering/scaling.md index 6e7d312..26d82bd 100644 --- a/docs/gitbook/ft_engineering/scaling.md +++ b/docs/gitbook/ft_engineering/scaling.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + # Min-Max Normalization http://en.wikipedia.org/wiki/Feature_scaling#Rescaling ```sql http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/ft_engineering/tfidf.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/ft_engineering/tfidf.md b/docs/gitbook/ft_engineering/tfidf.md index e881e10..46e4fac 100644 --- a/docs/gitbook/ft_engineering/tfidf.md +++ b/docs/gitbook/ft_engineering/tfidf.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + This document explains how to compute [TF-IDF](http://en.wikipedia.org/wiki/Tf%E2%80%93idf) with Apache Hive/Hivemall. What you need to compute TF-IDF is a table/view composing (docid, word) pair, 2 views, and 1 query. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/ft_engineering/vectorizer.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/ft_engineering/vectorizer.md b/docs/gitbook/ft_engineering/vectorizer.md index bc929a5..59038d1 100644 --- a/docs/gitbook/ft_engineering/vectorizer.md +++ b/docs/gitbook/ft_engineering/vectorizer.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + ## Feature Vectorizer `array<string> vectorize_feature(array<string> featureNames, ...)` is useful to generate a feature vector for each row, from a table. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/getting_started/README.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/getting_started/README.md b/docs/gitbook/getting_started/README.md index 27870e5..98393aa 100644 --- a/docs/gitbook/getting_started/README.md +++ b/docs/gitbook/getting_started/README.md @@ -1 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + # Summary \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/getting_started/input-format.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/getting_started/input-format.md b/docs/gitbook/getting_started/input-format.md index 272d3eb..698c095 100644 --- a/docs/gitbook/getting_started/input-format.md +++ b/docs/gitbook/getting_started/input-format.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + This page explains the input format of training data in Hivemall. Here, we use [EBNF](http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form)-like notation for describing the format. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/getting_started/installation.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/getting_started/installation.md b/docs/gitbook/getting_started/installation.md index bb1920e..3a3c97f 100644 --- a/docs/gitbook/getting_started/installation.md +++ b/docs/gitbook/getting_started/installation.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + Prerequisites ============ http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/getting_started/permanent-functions.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/getting_started/permanent-functions.md b/docs/gitbook/getting_started/permanent-functions.md index aab399b..a4879c3 100644 --- a/docs/gitbook/getting_started/permanent-functions.md +++ b/docs/gitbook/getting_started/permanent-functions.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + Hive v0.13 or later supports [permanent functions](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-Create/DropFunction) that live across sessions. Permanent functions are useful when you are using Hive through Hiveserver or to avoid hivemall installation for each session. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/misc/generic_funcs.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/misc/generic_funcs.md b/docs/gitbook/misc/generic_funcs.md index 1769699..9749dae 100644 --- a/docs/gitbook/misc/generic_funcs.md +++ b/docs/gitbook/misc/generic_funcs.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + This page describes a list of useful Hivemall generic functions. # Array functions http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/misc/tokenizer.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/misc/tokenizer.md b/docs/gitbook/misc/tokenizer.md index cd2ce08..47f07e0 100644 --- a/docs/gitbook/misc/tokenizer.md +++ b/docs/gitbook/misc/tokenizer.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + # Tokenizer for English Texts Hivemall provides simple English text tokenizer UDF that has following syntax: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/misc/topk.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/misc/topk.md b/docs/gitbook/misc/topk.md index dcd545a..3d072ed 100644 --- a/docs/gitbook/misc/topk.md +++ b/docs/gitbook/misc/topk.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + `each_top_k(int k, ANY group, double value, arg1, arg2, ..., argN)` returns a top-k records for each `group`. It returns a relation consists of `(int rank, double value, arg1, arg2, .., argN)`. This function is particularly useful for applying a similarity/distance function where the computation complexity is **O(nm)**. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/multiclass/iris.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/multiclass/iris.md b/docs/gitbook/multiclass/iris.md index e69de29..2959148 100644 --- a/docs/gitbook/multiclass/iris.md +++ b/docs/gitbook/multiclass/iris.md @@ -0,0 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/multiclass/iris_dataset.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/multiclass/iris_dataset.md b/docs/gitbook/multiclass/iris_dataset.md index 86f89ad..38a6831 100644 --- a/docs/gitbook/multiclass/iris_dataset.md +++ b/docs/gitbook/multiclass/iris_dataset.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + # Dataset prepration Iris Dataset: https://archive.ics.uci.edu/ml/datasets/Iris http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/multiclass/iris_randomforest.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/multiclass/iris_randomforest.md b/docs/gitbook/multiclass/iris_randomforest.md index bafa338..fd85471 100644 --- a/docs/gitbook/multiclass/iris_randomforest.md +++ b/docs/gitbook/multiclass/iris_randomforest.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + *NOTE: RandomForest is being supported from Hivemall v0.4 or later.* # Dataset http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/multiclass/iris_scw.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/multiclass/iris_scw.md b/docs/gitbook/multiclass/iris_scw.md index bafa338..fd85471 100644 --- a/docs/gitbook/multiclass/iris_scw.md +++ b/docs/gitbook/multiclass/iris_scw.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + *NOTE: RandomForest is being supported from Hivemall v0.4 or later.* # Dataset http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/multiclass/news20.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/multiclass/news20.md b/docs/gitbook/multiclass/news20.md index e69de29..2959148 100644 --- a/docs/gitbook/multiclass/news20.md +++ b/docs/gitbook/multiclass/news20.md @@ -0,0 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/multiclass/news20_dataset.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/multiclass/news20_dataset.md b/docs/gitbook/multiclass/news20_dataset.md index 35ada12..96decec 100644 --- a/docs/gitbook/multiclass/news20_dataset.md +++ b/docs/gitbook/multiclass/news20_dataset.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + Get the news20 dataset. http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html#news20 http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/multiclass/news20_ensemble.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/multiclass/news20_ensemble.md b/docs/gitbook/multiclass/news20_ensemble.md index 9cfd35d..6bf1c93 100644 --- a/docs/gitbook/multiclass/news20_ensemble.md +++ b/docs/gitbook/multiclass/news20_ensemble.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + This example explains how to run ensemble learning in Hivemall. Two heads are better than one? Let's verify it by ensemble learning. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/multiclass/news20_one-vs-the-rest.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/multiclass/news20_one-vs-the-rest.md b/docs/gitbook/multiclass/news20_one-vs-the-rest.md index 4c611d0..d98329f 100644 --- a/docs/gitbook/multiclass/news20_one-vs-the-rest.md +++ b/docs/gitbook/multiclass/news20_one-vs-the-rest.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + A one-vs-the-rest classifier use the binary classifier for each class. ## UDF preparation http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/multiclass/news20_one-vs-the-rest_dataset.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/multiclass/news20_one-vs-the-rest_dataset.md b/docs/gitbook/multiclass/news20_one-vs-the-rest_dataset.md index 2a69615..f437399 100644 --- a/docs/gitbook/multiclass/news20_one-vs-the-rest_dataset.md +++ b/docs/gitbook/multiclass/news20_one-vs-the-rest_dataset.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + *One-vs-the-rest* is a multiclass classification method that uses binary classifiers independently for each class. http://en.wikipedia.org/wiki/Multiclass_classification#one_vs_all http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/multiclass/news20_pa.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/multiclass/news20_pa.md b/docs/gitbook/multiclass/news20_pa.md index 8e69beb..26083f9 100644 --- a/docs/gitbook/multiclass/news20_pa.md +++ b/docs/gitbook/multiclass/news20_pa.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + Preparation ========= http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/multiclass/news20_scw.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/multiclass/news20_scw.md b/docs/gitbook/multiclass/news20_scw.md index 330c163..f6f21af 100644 --- a/docs/gitbook/multiclass/news20_scw.md +++ b/docs/gitbook/multiclass/news20_scw.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + | Algorithm | Accuracy | |:-----------|------------:| | PA2 | 0.8204357625845229 | http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/recommend/cf.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/recommend/cf.md b/docs/gitbook/recommend/cf.md index e69de29..2959148 100644 --- a/docs/gitbook/recommend/cf.md +++ b/docs/gitbook/recommend/cf.md @@ -0,0 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/recommend/item_based_cf.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/recommend/item_based_cf.md b/docs/gitbook/recommend/item_based_cf.md index a4a8cfd..2eb7890 100644 --- a/docs/gitbook/recommend/item_based_cf.md +++ b/docs/gitbook/recommend/item_based_cf.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + This document describe how to do Item-based Collaborative Filtering using Hivemall. _Caution: naive similarity computation is `O(n^2)` to compute all item-item pair similarity. [MinHash](https://en.wikipedia.org/wiki/MinHash#Jaccard_similarity_and_minimum_hash_values) is an efficient scheme for computing jaccard similarity. Section 6 show how to use MinHash in Hivemall._ http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/recommend/movielens.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/recommend/movielens.md b/docs/gitbook/recommend/movielens.md index e69de29..2959148 100644 --- a/docs/gitbook/recommend/movielens.md +++ b/docs/gitbook/recommend/movielens.md @@ -0,0 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/recommend/movielens_cv.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/recommend/movielens_cv.md b/docs/gitbook/recommend/movielens_cv.md index ec2255b..a1f7b2f 100644 --- a/docs/gitbook/recommend/movielens_cv.md +++ b/docs/gitbook/recommend/movielens_cv.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + [Cross-validation](http://en.wikipedia.org/wiki/Cross-validation_(statistics)#k-fold_cross-validationk-fold cross validation) is a model validation technique for assessing how a prediction model will generalize to an independent data set. This example shows a way to perform [k-fold cross validation](http://en.wikipedia.org/wiki/Cross-validation_(statistics)#k-fold_cross-validation) to evaluate prediction performance. *Caution:* Matrix factorization is supported in Hivemall v0.3 or later. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/recommend/movielens_dataset.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/recommend/movielens_dataset.md b/docs/gitbook/recommend/movielens_dataset.md index f175f4f..27c04ba 100644 --- a/docs/gitbook/recommend/movielens_dataset.md +++ b/docs/gitbook/recommend/movielens_dataset.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + # Data preparation First, downlod MovieLens dataset from the following site. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/recommend/movielens_fm.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/recommend/movielens_fm.md b/docs/gitbook/recommend/movielens_fm.md index 282e923..eac8013 100644 --- a/docs/gitbook/recommend/movielens_fm.md +++ b/docs/gitbook/recommend/movielens_fm.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + _Caution: Factorization Machine is supported from Hivemall v0.4 or later._ # Data preparation http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/recommend/movielens_mf.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/recommend/movielens_mf.md b/docs/gitbook/recommend/movielens_mf.md index bc5c641..f275df8 100644 --- a/docs/gitbook/recommend/movielens_mf.md +++ b/docs/gitbook/recommend/movielens_mf.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + This page explains how to run matrix factorization on [MovieLens 1M dataset](https://github.com/myui/hivemall/wiki/MovieLens-Dataset). *Caution:* Matrix factorization is supported in Hivemall v0.3 or later. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/recommend/news20.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/recommend/news20.md b/docs/gitbook/recommend/news20.md index e69de29..2959148 100644 --- a/docs/gitbook/recommend/news20.md +++ b/docs/gitbook/recommend/news20.md @@ -0,0 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/recommend/news20_bbit_minhash.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/recommend/news20_bbit_minhash.md b/docs/gitbook/recommend/news20_bbit_minhash.md index 72ba7f2..474a40d 100644 --- a/docs/gitbook/recommend/news20_bbit_minhash.md +++ b/docs/gitbook/recommend/news20_bbit_minhash.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + # Function Signature of bbit_minhash ``` http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/recommend/news20_jaccard.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/recommend/news20_jaccard.md b/docs/gitbook/recommend/news20_jaccard.md index ea6f8cc..6a30fb8 100644 --- a/docs/gitbook/recommend/news20_jaccard.md +++ b/docs/gitbook/recommend/news20_jaccard.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + List related (similar) articles for each article. # Preparation http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/recommend/news20_knn.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/recommend/news20_knn.md b/docs/gitbook/recommend/news20_knn.md index 0471a2e..1e0ae97 100644 --- a/docs/gitbook/recommend/news20_knn.md +++ b/docs/gitbook/recommend/news20_knn.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + # Extract clusters and assign N cluster IDs to each article ``` create or replace view news20_cluster http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fc97a52e/docs/gitbook/regression/e2006.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/regression/e2006.md b/docs/gitbook/regression/e2006.md index e69de29..2959148 100644 --- a/docs/gitbook/regression/e2006.md +++ b/docs/gitbook/regression/e2006.md @@ -0,0 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +
