http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/70615026/userguide/recommend/news20_knn.html ---------------------------------------------------------------------- diff --git a/userguide/recommend/news20_knn.html b/userguide/recommend/news20_knn.html index cf42d4b..cbca598 100644 --- a/userguide/recommend/news20_knn.html +++ b/userguide/recommend/news20_knn.html @@ -1631,7 +1631,25 @@ <section class="normal markdown-section"> - <h1 id="extract-clusters-and-assign-n-cluster-ids-to-each-article">Extract clusters and assign N cluster IDs to each article</h1> + <!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<h1 id="extract-clusters-and-assign-n-cluster-ids-to-each-article">Extract clusters and assign N cluster IDs to each article</h1> <pre><code>create or replace view news20_cluster as select @@ -1798,7 +1816,11 @@ LATERAL VIEW explode(clusters) t2 AS clusterid; </tr> </tbody> </table> -<p>Refer <a href="https://github.com/myui/hivemall/wiki/Efficient-Top-k-computation-on-Apache-Hive-using-Hivemall-UDTF#top-k-similarity-computation" target="_blank">this page</a> for efficient top-k kNN computation.</p> +<p>Refer <a href="https://github.com/myui/hivemall/wiki/Efficient-Top-k-computation-on-Apache-Hive-using-Hivemall-UDTF#top-k-similarity-computation" target="_blank">this page</a> for efficient top-k kNN computation. +<div id="page-footer"><hr><p><sub><font color="gray"> +Apache Hivemall is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. +</font></sub></p> +</div></p> </section> @@ -1832,7 +1854,7 @@ LATERAL VIEW explode(clusters) t2 AS clusterid; <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"LSH/Minhash and Brute-Force Search","level":"8.2.3","depth":2,"next":{"title":"kNN search using b-Bits Minhash","level":"8.2.4","depth":2,"path":"recommend/news20_bbit_minhash.md","ref":"recommend/news20_bbit_minhash.md","articles":[]},"previous":{"title":"LSH/Minhash and Jaccard Similarity","level":"8.2.2","depth":2,"path":"recommend/news20_jaccard.md","ref":"recommend/news20_jaccard.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apach e/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callou t) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"recommend/news20_knn.md","mtime":"2016-10-22T18:42:28.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2016-10-22T19:10:36.585Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"LSH/Minhash and Brute-Force Search","level":"8.2.3","depth":2,"next":{"title":"kNN search using b-Bits Minhash","level":"8.2.4","depth":2,"path":"recommend/news20_bbit_minhash.md","ref":"recommend/news20_bbit_minhash.md","articles":[]},"previous":{"title":"LSH/Minhash and Jaccard Similarity","level":"8.2.2","depth":2,"path":"recommend/news20_jaccard.md","ref":"recommend/news20_jaccard.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https ://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"}," showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"recommend/news20_knn.md","mtime":"2016-11-12T07:18:00.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2016-11-14T10:40:22.987Z"},"basePath":"..","book":{"language":""}}); }); </script> </div>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/70615026/userguide/regression/e2006.html ---------------------------------------------------------------------- diff --git a/userguide/regression/e2006.html b/userguide/regression/e2006.html index e9fdca9..f6c0b66 100644 --- a/userguide/regression/e2006.html +++ b/userguide/regression/e2006.html @@ -1631,7 +1631,29 @@ <section class="normal markdown-section"> - + <!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<p><div id="page-footer"><hr><p><sub><font color="gray"> +Apache Hivemall is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. +</font></sub></p> +</div></p> + </section> @@ -1664,7 +1686,7 @@ <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"E2006-tfidf regression Tutorial","level":"7.1","depth":1,"next":{"title":"Data preparation","level":"7.1.1","depth":2,"path":"regression/e2006_dataset.md","ref":"regression/e2006_dataset.md","articles":[]},"previous":{"title":"RandomForest","level":"6.2.3","depth":2,"path":"multiclass/iris_randomforest.md","ref":"multiclass/iris_randomforest.md","articles":[]},"dir":"neutral"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"s earch":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expan dable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/e2006.md","mtime":"2016-10-22T18:05:53.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2016-10-22T19:10:36.585Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"E2006-tfidf regression Tutorial","level":"7.1","depth":1,"next":{"title":"Data preparation","level":"7.1.1","depth":2,"path":"regression/e2006_dataset.md","ref":"regression/e2006_dataset.md","articles":[]},"previous":{"title":"RandomForest","level":"6.2.3","depth":2,"path":"multiclass/iris_randomforest.md","ref":"multiclass/iris_randomforest.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"}," splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1, h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/e2006.md","mtime":"2016-11-12T07:18:00.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2016-11-14T10:40:22.987Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/70615026/userguide/regression/e2006_arow.html ---------------------------------------------------------------------- diff --git a/userguide/regression/e2006_arow.html b/userguide/regression/e2006_arow.html index 0e3f2b9..5a0351f 100644 --- a/userguide/regression/e2006_arow.html +++ b/userguide/regression/e2006_arow.html @@ -1631,7 +1631,25 @@ <section class="normal markdown-section"> - <p><a href="http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression.html#E2006-tfidf" target="_blank">http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression.html#E2006-tfidf</a></p> + <!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<p><a href="http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression.html#E2006-tfidf" target="_blank">http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression.html#E2006-tfidf</a></p> <hr> <h1 id="pa1a">[PA1a]</h1> <h2 id="training">Training</h2> @@ -1868,7 +1886,11 @@ e2006tfidf_arowe_submit; </code></pre> <blockquote> -<p>0.37789148212861856 0.14280197226536404 0.2357339155291536 0.5060283955470721</p> +<p>0.37789148212861856 0.14280197226536404 0.2357339155291536 0.5060283955470721 +<div id="page-footer"><hr><p><sub><font color="gray"> +Apache Hivemall is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. +</font></sub></p> +</div></p> </blockquote> @@ -1903,7 +1925,7 @@ <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"Passive Aggressive, AROW","level":"7.1.2","depth":2,"next":{"title":"KDDCup 2012 track 2 CTR prediction Tutorial","level":"7.2","depth":1,"path":"regression/kddcup12tr2.md","ref":"regression/kddcup12tr2.md","articles":[{"title":"Data preparation","level":"7.2.1","depth":2,"path":"regression/kddcup12tr2_dataset.md","ref":"regression/kddcup12tr2_dataset.md","articles":[]},{"title":"Logistic Regression, Passive Aggressive","level":"7.2.2","depth":2,"path":"regression/kddcup12tr2_lr.md","ref":"regression/kddcup12tr2_lr.md","articles":[]},{"title":"Logistic Regression with Amplifier","level":"7.2.3","depth":2,"path":"regression/kddcup12tr2_lr_amplify.md","ref":"regression/kddcup12tr2_lr_amplify.md","articles":[]},{"title":"AdaGrad, AdaDelta","level":"7.2.4","depth":2,"path":"regression/kddcup12tr2_adagrad.md","ref":"regression/kddcup12tr2_adagrad.md","articles":[]}]},"previous":{"title":"Data preparation","level":"7.1.1","depth":2,"pa th":"regression/e2006_dataset.md","ref":"regression/e2006_dataset.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitema p":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{}, "title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/e2006_arow.md","mtime":"2016-10-22T18:07:08.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2016-10-22T19:10:36.585Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"Passive Aggressive, AROW","level":"7.1.2","depth":2,"next":{"title":"KDDCup 2012 track 2 CTR prediction Tutorial","level":"7.2","depth":1,"path":"regression/kddcup12tr2.md","ref":"regression/kddcup12tr2.md","articles":[{"title":"Data preparation","level":"7.2.1","depth":2,"path":"regression/kddcup12tr2_dataset.md","ref":"regression/kddcup12tr2_dataset.md","articles":[]},{"title":"Logistic Regression, Passive Aggressive","level":"7.2.2","depth":2,"path":"regression/kddcup12tr2_lr.md","ref":"regression/kddcup12tr2_lr.md","articles":[]},{"title":"Logistic Regression with Amplifier","level":"7.2.3","depth":2,"path":"regression/kddcup12tr2_lr_amplify.md","ref":"regression/kddcup12tr2_lr_amplify.md","articles":[]},{"title":"AdaGrad, AdaDelta","level":"7.2.4","depth":2,"path":"regression/kddcup12tr2_adagrad.md","ref":"regression/kddcup12tr2_adagrad.md","articles":[]}]},"previous":{"title":"Data preparation","level":"7.1.1","depth":2,"pa th":"regression/e2006_dataset.md","ref":"regression/e2006_dataset.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size": 2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md"," glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/e2006_arow.md","mtime":"2016-11-12T07:18:00.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2016-11-14T10:40:22.987Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/70615026/userguide/regression/e2006_dataset.html ---------------------------------------------------------------------- diff --git a/userguide/regression/e2006_dataset.html b/userguide/regression/e2006_dataset.html index 9ca2840..bbc3ccd 100644 --- a/userguide/regression/e2006_dataset.html +++ b/userguide/regression/e2006_dataset.html @@ -1631,7 +1631,25 @@ <section class="normal markdown-section"> - <p><a href="http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression.html#E2006-tfidf" target="_blank">http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression.html#E2006-tfidf</a></p> + <!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<p><a href="http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression.html#E2006-tfidf" target="_blank">http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression.html#E2006-tfidf</a></p> <h1 id="prerequisite">Prerequisite</h1> <ul> <li><a href="https://github.com/myui/hivemall/tree/master/target/hivemall.jar" target="_blank">hivemall.jar</a></li> @@ -1695,6 +1713,10 @@ source /home/myui/tmp/define-all.hive; CLUSTER <span class="hljs-keyword">BY</span> <span class="hljs-keyword">rand</span>(${<span class="hljs-keyword">seed</span>}); <span class="hljs-comment">-- set mapred.reduce.tasks=-1;</span> </code></pre> +<p><div id="page-footer"><hr><p><sub><font color="gray"> +Apache Hivemall is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. +</font></sub></p> +</div></p> </section> @@ -1728,7 +1750,7 @@ CLUSTER <span class="hljs-keyword">BY</span> <span class="hljs-keyword">rand</sp <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"Data preparation","level":"7.1.1","depth":2,"next":{"title":"Passive Aggressive, AROW","level":"7.1.2","depth":2,"path":"regression/e2006_arow.md","ref":"regression/e2006_arow.md","articles":[]},"previous":{"title":"E2006-tfidf regression Tutorial","level":"7.1","depth":1,"path":"regression/e2006.md","ref":"regression/e2006.md","articles":[{"title":"Data preparation","level":"7.1.1","depth":2,"path":"regression/e2006_dataset.md","ref":"regression/e2006_dataset.md","articles":[]},{"title":"Passive Aggressive, AROW","level":"7.1.2","depth":2,"path":"regression/e2006_arow.md","ref":"regression/e2006_arow.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/ep ub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{ "website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/e2006_dataset.md","mtime":"2016-10-22T18:06:19.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2016-10-22T19:10:36.585Z"},"basePath":"..","book":{"language":""}} ); + gitbook.page.hasChanged({"page":{"title":"Data preparation","level":"7.1.1","depth":2,"next":{"title":"Passive Aggressive, AROW","level":"7.1.2","depth":2,"path":"regression/e2006_arow.md","ref":"regression/e2006_arow.md","articles":[]},"previous":{"title":"E2006-tfidf regression Tutorial","level":"7.1","depth":1,"path":"regression/e2006.md","ref":"regression/e2006.md","articles":[{"title":"Data preparation","level":"7.1.1","depth":2,"path":"regression/e2006_dataset.md","ref":"regression/e2006_dataset.md","articles":[]},{"title":"Passive Aggressive, AROW","level":"7.1.2","depth":2,"path":"regression/e2006_arow.md","ref":"regression/e2006_arow.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css ","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apach e/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/e2006_dataset.md","mtime":"2016-11-12T07:18:00.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"201 6-11-14T10:40:22.987Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/70615026/userguide/regression/kddcup12tr2.html ---------------------------------------------------------------------- diff --git a/userguide/regression/kddcup12tr2.html b/userguide/regression/kddcup12tr2.html index 6be1ece..c06afa7 100644 --- a/userguide/regression/kddcup12tr2.html +++ b/userguide/regression/kddcup12tr2.html @@ -1631,7 +1631,29 @@ <section class="normal markdown-section"> - + <!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<p><div id="page-footer"><hr><p><sub><font color="gray"> +Apache Hivemall is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. +</font></sub></p> +</div></p> + </section> @@ -1664,7 +1686,7 @@ <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"KDDCup 2012 track 2 CTR prediction Tutorial","level":"7.2","depth":1,"next":{"title":"Data preparation","level":"7.2.1","depth":2,"path":"regression/kddcup12tr2_dataset.md","ref":"regression/kddcup12tr2_dataset.md","articles":[]},"previous":{"title":"Passive Aggressive, AROW","level":"7.1.2","depth":2,"path":"regression/e2006_arow.md","ref":"regression/e2006_arow.md","articles":[]},"dir":"neutral"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivema ll/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"togg le-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/kddcup12tr2.md","mtime":"2016-10-22T18:07:35.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2016-10-22T19:10:36.585Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"KDDCup 2012 track 2 CTR prediction Tutorial","level":"7.2","depth":1,"next":{"title":"Data preparation","level":"7.2.1","depth":2,"path":"regression/kddcup12tr2_dataset.md","ref":"regression/kddcup12tr2_dataset.md","articles":[]},"previous":{"title":"Passive Aggressive, AROW","level":"7.1.2","depth":2,"path":"regression/e2006_arow.md","ref":"regression/e2006_arow.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/i ncubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anch orjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/kddcup12tr2.md","mtime":"2016-11-12T07:18:00.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2016-11-14T10:40:22.987Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/70615026/userguide/regression/kddcup12tr2_adagrad.html ---------------------------------------------------------------------- diff --git a/userguide/regression/kddcup12tr2_adagrad.html b/userguide/regression/kddcup12tr2_adagrad.html index a873493..ff3380e 100644 --- a/userguide/regression/kddcup12tr2_adagrad.html +++ b/userguide/regression/kddcup12tr2_adagrad.html @@ -1631,7 +1631,25 @@ <section class="normal markdown-section"> - <p><em>Note adagrad/adadelta is supported from hivemall v0.3b2 or later (or in the master branch).</em></p> + <!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<p><em>Note adagrad/adadelta is supported from hivemall v0.3b2 or later (or in the master branch).</em></p> <h1 id="preparation">Preparation</h1> <pre><code class="lang-sql">add jar ./tmp/hivemall-with-dependencies.jar; source ./tmp/define-all.hive; @@ -1729,7 +1747,11 @@ pypy scoreKDD.py KDD_Track2_solution.csv adadelta_predict.submit <blockquote> <p>AUC(SGD) : 0.739351</p> <p>AUC(ADAGRAD) : 0.743279</p> -<p>AUC(AdaDelta) : 0.746878</p> +<p>AUC(AdaDelta) : 0.746878 +<div id="page-footer"><hr><p><sub><font color="gray"> +Apache Hivemall is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. +</font></sub></p> +</div></p> </blockquote> @@ -1764,7 +1786,7 @@ pypy scoreKDD.py KDD_Track2_solution.csv adadelta_predict.submit <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"AdaGrad, AdaDelta","level":"7.2.4","depth":2,"next":{"title":"Collaborative Filtering","level":"8.1","depth":1,"path":"recommend/cf.md","ref":"recommend/cf.md","articles":[{"title":"Item-based Collaborative Filtering","level":"8.1.1","depth":2,"path":"recommend/item_based_cf.md","ref":"recommend/item_based_cf.md","articles":[]}]},"previous":{"title":"Logistic Regression with Amplifier","level":"7.2.3","depth":2,"path":"regression/kddcup12tr2_lr_amplify.md","ref":"regression/kddcup12tr2_lr_amplify.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emp hasize":{},"callouts":{},"etoc":{"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","eboo k":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/kddcup12tr2_adagrad.md","mtime":"2016-10-22T18:11:27.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2016-10-22T19:10:36.585Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"AdaGrad, AdaDelta","level":"7.2.4","depth":2,"next":{"title":"Collaborative Filtering","level":"8.1","depth":1,"path":"recommend/cf.md","ref":"recommend/cf.md","articles":[{"title":"Item-based Collaborative Filtering","level":"8.1.1","depth":2,"path":"recommend/item_based_cf.md","ref":"recommend/item_based_cf.md","articles":[]}]},"previous":{"title":"Logistic Regression with Amplifier","level":"7.2.3","depth":2,"path":"regression/kddcup12tr2_lr_amplify.md","ref":"regression/kddcup12tr2_lr_amplify.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"p luginsConfig":{"emphasize":{},"callouts":{},"etoc":{"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/p df.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/kddcup12tr2_adagrad.md","mtime":"2016-11-12T07:18:00.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2016-11-14T10:40:22.987Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/70615026/userguide/regression/kddcup12tr2_dataset.html ---------------------------------------------------------------------- diff --git a/userguide/regression/kddcup12tr2_dataset.html b/userguide/regression/kddcup12tr2_dataset.html index 89feae3..2b41f43 100644 --- a/userguide/regression/kddcup12tr2_dataset.html +++ b/userguide/regression/kddcup12tr2_dataset.html @@ -1631,7 +1631,25 @@ <section class="normal markdown-section"> - <p>The task is predicting the click through rate (CTR) of advertisement, meaning that we are to predict the probability of each ad being clicked. + <!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<p>The task is predicting the click through rate (CTR) of advertisement, meaning that we are to predict the probability of each ad being clicked. <a href="http://www.kddcup2012.org/c/kddcup2012-track2" target="_blank">http://www.kddcup2012.org/c/kddcup2012-track2</a></p> <hr> <p><strong>Dataset</strong> </p> @@ -1879,7 +1897,11 @@ CLUSTER <span class="hljs-keyword">BY</span> <span class="hljs-keyword">rand</sp testing2 LATERAL <span class="hljs-keyword">VIEW</span> explode(features) t <span class="hljs-keyword">AS</span> feature; </code></pre> -<p><em>Caution: We recommend you to set "mapred.reduce.tasks" in the above example to partition the training_orcfile table into pieces.</em></p> +<p><em>Caution: We recommend you to set "mapred.reduce.tasks" in the above example to partition the training_orcfile table into pieces.</em> +<div id="page-footer"><hr><p><sub><font color="gray"> +Apache Hivemall is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. +</font></sub></p> +</div></p> </section> @@ -1913,7 +1935,7 @@ CLUSTER <span class="hljs-keyword">BY</span> <span class="hljs-keyword">rand</sp <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"Data preparation","level":"7.2.1","depth":2,"next":{"title":"Logistic Regression, Passive Aggressive","level":"7.2.2","depth":2,"path":"regression/kddcup12tr2_lr.md","ref":"regression/kddcup12tr2_lr.md","articles":[]},"previous":{"title":"KDDCup 2012 track 2 CTR prediction Tutorial","level":"7.2","depth":1,"path":"regression/kddcup12tr2.md","ref":"regression/kddcup12tr2.md","articles":[{"title":"Data preparation","level":"7.2.1","depth":2,"path":"regression/kddcup12tr2_dataset.md","ref":"regression/kddcup12tr2_dataset.md","articles":[]},{"title":"Logistic Regression, Passive Aggressive","level":"7.2.2","depth":2,"path":"regression/kddcup12tr2_lr.md","ref":"regression/kddcup12tr2_lr.md","articles":[]},{"title":"Logistic Regression with Amplifier","level":"7.2.3","depth":2,"path":"regression/kddcup12tr2_lr_amplify.md","ref":"regression/kddcup12tr2_lr_amplify.md","articles":[]},{"title":"AdaGrad, AdaDelta","level":"7.2.4","depth":2, "path":"regression/kddcup12tr2_adagrad.md","ref":"regression/kddcup12tr2_adagrad.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-fil ename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md" },"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/kddcup12tr2_dataset.md","mtime":"2016-10-22T18:08:09.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2016-10-22T19:10:36.585Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"Data preparation","level":"7.2.1","depth":2,"next":{"title":"Logistic Regression, Passive Aggressive","level":"7.2.2","depth":2,"path":"regression/kddcup12tr2_lr.md","ref":"regression/kddcup12tr2_lr.md","articles":[]},"previous":{"title":"KDDCup 2012 track 2 CTR prediction Tutorial","level":"7.2","depth":1,"path":"regression/kddcup12tr2.md","ref":"regression/kddcup12tr2.md","articles":[{"title":"Data preparation","level":"7.2.1","depth":2,"path":"regression/kddcup12tr2_dataset.md","ref":"regression/kddcup12tr2_dataset.md","articles":[]},{"title":"Logistic Regression, Passive Aggressive","level":"7.2.2","depth":2,"path":"regression/kddcup12tr2_lr.md","ref":"regression/kddcup12tr2_lr.md","articles":[]},{"title":"Logistic Regression with Amplifier","level":"7.2.3","depth":2,"path":"regression/kddcup12tr2_lr_amplify.md","ref":"regression/kddcup12tr2_lr_amplify.md","articles":[]},{"title":"AdaGrad, AdaDelta","level":"7.2.4","depth":2, "path":"regression/kddcup12tr2_adagrad.md","ref":"regression/kddcup12tr2_adagrad.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","famil y":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","read me":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/kddcup12tr2_dataset.md","mtime":"2016-11-12T07:18:00.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2016-11-14T10:40:22.987Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/70615026/userguide/regression/kddcup12tr2_lr.html ---------------------------------------------------------------------- diff --git a/userguide/regression/kddcup12tr2_lr.html b/userguide/regression/kddcup12tr2_lr.html index 87dc214..40a54ea 100644 --- a/userguide/regression/kddcup12tr2_lr.html +++ b/userguide/regression/kddcup12tr2_lr.html @@ -1631,7 +1631,25 @@ <section class="normal markdown-section"> - <p>The task is predicting the click through rate (CTR) of advertisement, meaning that we are to predict the probability of each ad being clicked.<br><a href="http://www.kddcup2012.org/c/kddcup2012-track2" target="_blank">http://www.kddcup2012.org/c/kddcup2012-track2</a></p> + <!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<p>The task is predicting the click through rate (CTR) of advertisement, meaning that we are to predict the probability of each ad being clicked.<br><a href="http://www.kddcup2012.org/c/kddcup2012-track2" target="_blank">http://www.kddcup2012.org/c/kddcup2012-track2</a></p> <p><em>Caution: This example just shows a baseline result. Use token tables and amplifier to get better AUC score.</em></p> <hr> <h1 id="logistic-regression">Logistic Regression</h1> @@ -1781,6 +1799,10 @@ pypy scoreKDD.py KDD_Track2_solution.csv pa_predict.submit </tr> </tbody> </table> +<p><div id="page-footer"><hr><p><sub><font color="gray"> +Apache Hivemall is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. +</font></sub></p> +</div></p> </section> @@ -1814,7 +1836,7 @@ pypy scoreKDD.py KDD_Track2_solution.csv pa_predict.submit <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"Logistic Regression, Passive Aggressive","level":"7.2.2","depth":2,"next":{"title":"Logistic Regression with Amplifier","level":"7.2.3","depth":2,"path":"regression/kddcup12tr2_lr_amplify.md","ref":"regression/kddcup12tr2_lr_amplify.md","articles":[]},"previous":{"title":"Data preparation","level":"7.2.1","depth":2,"path":"regression/kddcup12tr2_dataset.md","ref":"regression/kddcup12tr2_dataset.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github .com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:n ot(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/kddcup12tr2_lr.md","mtime":"2016-10-22T18:09:48.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2016-10-22T19:10:36.585Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"Logistic Regression, Passive Aggressive","level":"7.2.2","depth":2,"next":{"title":"Logistic Regression with Amplifier","level":"7.2.3","depth":2,"path":"regression/kddcup12tr2_lr_amplify.md","ref":"regression/kddcup12tr2_lr_amplify.md","articles":[]},"previous":{"title":"Data preparation","level":"7.2.1","depth":2,"path":"regression/kddcup12tr2_dataset.md","ref":"regression/kddcup12tr2_dataset.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"maxdepth":3,"mindepth":1,"notoc":true},"github":{"u rl":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/pri nt.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/kddcup12tr2_lr.md","mtime":"2016-11-12T07:18:00.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2016-11-14T10:40:22.987Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/70615026/userguide/regression/kddcup12tr2_lr_amplify.html ---------------------------------------------------------------------- diff --git a/userguide/regression/kddcup12tr2_lr_amplify.html b/userguide/regression/kddcup12tr2_lr_amplify.html index e019652..ef32b5d 100644 --- a/userguide/regression/kddcup12tr2_lr_amplify.html +++ b/userguide/regression/kddcup12tr2_lr_amplify.html @@ -1631,7 +1631,25 @@ <section class="normal markdown-section"> - <p>This article explains <em>amplify</em> technique that is useful for improving prediction score.</p> + <!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<p>This article explains <em>amplify</em> technique that is useful for improving prediction score.</p> <p>Iterations are mandatory in machine learning (e.g., in <a href="http://en.wikipedia.org/wiki/Stochastic_gradient_descent" target="_blank">stochastic gradient descent</a>) to get good prediction models. However, MapReduce is known to be not suited for iterative algorithms because IN/OUT of each MapReduce job is through HDFS.</p> <p>In this example, we show how Hivemall deals with this problem. We use <a href="https://github.com/myui/hivemall/wiki/KDDCup-2012-track-2-CTR-prediction-dataset" target="_blank">KDD Cup 2012, Track 2 Task</a> as an example.</p> <p><strong>WARNING</strong>: rand_amplify() is supported in v0.2-beta1 and later.</p> @@ -1670,14 +1688,13 @@ So, we recommend users to use an amplified view for training as follows:</p> ) t <span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span> feature; </code></pre> -<p>The above query is executed by 2 MapReduce jobs as shown below: -<img src="https://dl.dropboxusercontent.com/u/13123103/hivemall/amplify.png" alt="amplifier"> -<a href="https://dl.dropboxusercontent.com/u/13123103/hivemall/amplify_plan.txt" target="_blank">Here</a> is the actual plan generated by the Hive.</p> +<p>The above query is executed by 2 MapReduce jobs as shown below:</p> +<p><img src="../resources/images/amplify.png" alt="amplifier"></p> <p>Using <em>trainning_x3</em> instead of the plain training table results in higher and better AUC (0.746214) in <a href="https://github.com/myui/hivemall/wiki/KDDCup-2012-track-2-CTR-prediction-(regression\" target="_blank">this</a>) example.</p> <p>A problem in amplify() is that the shuffle (copy) and merge phase of the stage 1 could become a bottleneck. When the training table is so large that involves 100 Map tasks, the merge operator needs to merge at least 100 files by (external) merge sort! </p> <p>Note that the actual bottleneck is not M/R iterations but shuffling training instance. Iteration without shuffling (as in <a href="http://spark.incubator.apache.org/examples.html" target="_blank">the Spark example</a>) causes very slow convergence and results in requiring more iterations. Shuffling cannot be avoided even in iterative MapReduce variants.</p> -<p><img src="https://dl.dropboxusercontent.com/u/13123103/hivemall/amplify_elapsed.png" alt="amplify elapsed"></p> +<p><img src="../resources/images/amplify_elapsed.png" alt="amplify elapsed"></p> <hr> <h1 id="amplify-and-shuffle-training-examples-in-each-map-task">Amplify and shuffle training examples in each Map task</h1> <p>To deal with large training data, Hivemall provides <strong>rand_amplify</strong> UDTF that randomly shuffles input rows in a Map task. @@ -1692,10 +1709,10 @@ The rand_amplify UDTF outputs rows in a random order when the local buffer speci <span class="hljs-keyword">from</span> training_orcfile; </code></pre> -<p>The training query is executed as follows: -<img src="https://dl.dropboxusercontent.com/u/13123103/hivemall/randamplify.png" alt="Random amplify"><br><a href="https://dl.dropboxusercontent.com/u/13123103/hivemall/randamplify_plan.txt" target="_blank">Here</a> is the actual query plan.</p> +<p>The training query is executed as follows:</p> +<p><img src="../resources/images/randamplify.png" alt="Random amplify"></p> <p>The map-local multiplication and shuffling has no bottleneck in the merge phase and the query is efficiently executed within a single MapReduce job.</p> -<p><img src="https://dl.dropboxusercontent.com/u/13123103/hivemall/randamplify_elapsed.png" alt="rand_amplify elapsed "></p> +<p><img src="../resources/images/randamplify_elapsed.png" alt="rand_amplify elapsed"></p> <p>Using <em>rand_amplify</em> results in a better AUC (0.743392) in <a href="https://github.com/myui/hivemall/wiki/KDDCup-2012-track-2-CTR-prediction-(regression\" target="_blank">this</a>) example.</p> <hr> <h1 id="conclusion">Conclusion</h1> @@ -1726,6 +1743,10 @@ The rand_amplify UDTF outputs rows in a random order when the local buffer speci </tr> </tbody> </table> +<p><div id="page-footer"><hr><p><sub><font color="gray"> +Apache Hivemall is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. +</font></sub></p> +</div></p> </section> @@ -1759,7 +1780,7 @@ The rand_amplify UDTF outputs rows in a random order when the local buffer speci <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"Logistic Regression with Amplifier","level":"7.2.3","depth":2,"next":{"title":"AdaGrad, AdaDelta","level":"7.2.4","depth":2,"path":"regression/kddcup12tr2_adagrad.md","ref":"regression/kddcup12tr2_adagrad.md","articles":[]},"previous":{"title":"Logistic Regression, Passive Aggressive","level":"7.2.2","depth":2,"path":"regression/kddcup12tr2_lr.md","ref":"regression/kddcup12tr2_lr.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/inc ubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/kddcup12tr2_lr_amplify.md","mtime":"2016-10-22T18:11:01.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2016-10-22T19:10:36.585Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"Logistic Regression with Amplifier","level":"7.2.3","depth":2,"next":{"title":"AdaGrad, AdaDelta","level":"7.2.4","depth":2,"path":"regression/kddcup12tr2_adagrad.md","ref":"regression/kddcup12tr2_adagrad.md","articles":[]},"previous":{"title":"Logistic Regression, Passive Aggressive","level":"7.2.2","depth":2,"path":"regression/kddcup12tr2_lr.md","ref":"regression/kddcup12tr2_lr.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://gi thub.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showL evel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"regression/kddcup12tr2_lr_amplify.md","mtime":"2016-11-14T09:52:36.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2016-11-14T10:40:22.987Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/70615026/userguide/resources/images/amplify.png ---------------------------------------------------------------------- diff --git a/userguide/resources/images/amplify.png b/userguide/resources/images/amplify.png new file mode 100644 index 0000000..f537e98 Binary files /dev/null and b/userguide/resources/images/amplify.png differ http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/70615026/userguide/resources/images/amplify_elapsed.png ---------------------------------------------------------------------- diff --git a/userguide/resources/images/amplify_elapsed.png b/userguide/resources/images/amplify_elapsed.png new file mode 100644 index 0000000..595dd60 Binary files /dev/null and b/userguide/resources/images/amplify_elapsed.png differ http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/70615026/userguide/resources/images/emr-bootstrap.png ---------------------------------------------------------------------- diff --git a/userguide/resources/images/emr-bootstrap.png b/userguide/resources/images/emr-bootstrap.png new file mode 100644 index 0000000..fea2ee2 Binary files /dev/null and b/userguide/resources/images/emr-bootstrap.png differ http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/70615026/userguide/resources/images/emr-wizard.png ---------------------------------------------------------------------- diff --git a/userguide/resources/images/emr-wizard.png b/userguide/resources/images/emr-wizard.png new file mode 100644 index 0000000..725cc9e Binary files /dev/null and b/userguide/resources/images/emr-wizard.png differ http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/70615026/userguide/resources/images/randamplify.png ---------------------------------------------------------------------- diff --git a/userguide/resources/images/randamplify.png b/userguide/resources/images/randamplify.png new file mode 100644 index 0000000..432f775 Binary files /dev/null and b/userguide/resources/images/randamplify.png differ http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/70615026/userguide/resources/images/randamplify_elapsed.png ---------------------------------------------------------------------- diff --git a/userguide/resources/images/randamplify_elapsed.png b/userguide/resources/images/randamplify_elapsed.png new file mode 100644 index 0000000..7d5be32 Binary files /dev/null and b/userguide/resources/images/randamplify_elapsed.png differ
