http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/d9012d92/userguide/binaryclass/criteo_ffm.html ---------------------------------------------------------------------- diff --git a/userguide/binaryclass/criteo_ffm.html b/userguide/binaryclass/criteo_ffm.html index 465f74d..b83faf0 100644 --- a/userguide/binaryclass/criteo_ffm.html +++ b/userguide/binaryclass/criteo_ffm.html @@ -972,7 +972,7 @@ <b>6.2.1.</b> - Data preparation + Data Preparation </a> @@ -980,13 +980,28 @@ </li> - <li class="chapter " data-level="6.2.2" data-path="a9a_lr.html"> + <li class="chapter " data-level="6.2.2" data-path="a9a_generic.html"> - <a href="a9a_lr.html"> + <a href="a9a_generic.html"> <b>6.2.2.</b> + General Binary Classifier + + </a> + + + + </li> + + <li class="chapter " data-level="6.2.3" data-path="a9a_lr.html"> + + <a href="a9a_lr.html"> + + + <b>6.2.3.</b> + Logistic Regression </a> @@ -995,14 +1010,14 @@ </li> - <li class="chapter " data-level="6.2.3" data-path="a9a_minibatch.html"> + <li class="chapter " data-level="6.2.4" data-path="a9a_minibatch.html"> <a href="a9a_minibatch.html"> - <b>6.2.3.</b> + <b>6.2.4.</b> - Mini-batch gradient descent + Mini-batch Gradient Descent </a> @@ -1038,7 +1053,7 @@ <b>6.3.1.</b> - Data preparation + Data Preparation </a> @@ -1076,13 +1091,28 @@ </li> - <li class="chapter " data-level="6.3.4" data-path="news20_adagrad.html"> + <li class="chapter " data-level="6.3.4" data-path="news20_generic.html"> - <a href="news20_adagrad.html"> + <a href="news20_generic.html"> <b>6.3.4.</b> + General Binary Classifier + + </a> + + + + </li> + + <li class="chapter " data-level="6.3.5" data-path="news20_adagrad.html"> + + <a href="news20_adagrad.html"> + + + <b>6.3.5.</b> + AdaGradRDA, AdaGrad, AdaDelta </a> @@ -1091,12 +1121,12 @@ </li> - <li class="chapter " data-level="6.3.5" data-path="news20_rf.html"> + <li class="chapter " data-level="6.3.6" data-path="news20_rf.html"> <a href="news20_rf.html"> - <b>6.3.5.</b> + <b>6.3.6.</b> Random Forest @@ -1134,7 +1164,7 @@ <b>6.4.1.</b> - Data preparation + Data Preparation </a> @@ -1185,7 +1215,7 @@ <b>6.5.1.</b> - Data preparation + Data Preparation </a> @@ -1236,7 +1266,7 @@ <b>6.6.1.</b> - Data pareparation + Data Pareparation </a> @@ -1302,7 +1332,7 @@ <b>6.8.1.</b> - Data preparation + Data Preparation </a> @@ -1360,7 +1390,7 @@ <b>7.1.1.</b> - Data preparation + Data Preparation </a> @@ -1375,7 +1405,7 @@ <b>7.1.2.</b> - Data preparation for one-vs-the-rest classifiers + Data Preparation for one-vs-the-rest classifiers </a> @@ -1435,7 +1465,7 @@ <b>7.1.6.</b> - one-vs-the-rest classifier + one-vs-the-rest Classifier </a> @@ -1559,7 +1589,7 @@ <b>8.2.1.</b> - Data preparation + Data Preparation </a> @@ -1567,13 +1597,28 @@ </li> - <li class="chapter " data-level="8.2.2" data-path="../regression/e2006_arow.html"> + <li class="chapter " data-level="8.2.2" data-path="../regression/e2006_generic.html"> - <a href="../regression/e2006_arow.html"> + <a href="../regression/e2006_generic.html"> <b>8.2.2.</b> + General Regessor + + </a> + + + + </li> + + <li class="chapter " data-level="8.2.3" data-path="../regression/e2006_arow.html"> + + <a href="../regression/e2006_arow.html"> + + + <b>8.2.3.</b> + Passive Aggressive, AROW </a> @@ -1610,7 +1655,7 @@ <b>8.3.1.</b> - Data preparation + Data Preparation </a> @@ -1698,7 +1743,7 @@ <b>9.1.1.</b> - Item-based collaborative filtering + Item-based Collaborative Filtering </a> @@ -1734,7 +1779,7 @@ <b>9.2.1.</b> - Data preparation + Data Preparation </a> @@ -1749,7 +1794,7 @@ <b>9.2.2.</b> - LSH/MinHash and Jaccard similarity + LSH/MinHash and Jaccard Similarity </a> @@ -1764,7 +1809,7 @@ <b>9.2.3.</b> - LSH/MinHash and brute-force search + LSH/MinHash and Brute-force Search </a> @@ -1815,7 +1860,7 @@ <b>9.3.1.</b> - Data preparation + Data Preparation </a> @@ -1830,7 +1875,7 @@ <b>9.3.2.</b> - Item-based collaborative filtering + Item-based Collaborative Filtering </a> @@ -1875,7 +1920,7 @@ <b>9.3.5.</b> - SLIM for fast top-k recommendation + SLIM for fast top-k Recommendation </a> @@ -1890,7 +1935,7 @@ <b>9.3.6.</b> - 10-fold cross validation (Matrix Factorization) + 10-fold Cross Validation (Matrix Factorization) </a> @@ -2080,7 +2125,7 @@ <b>13.2.1.</b> - a9a tutorial for DataFrame + a9a Tutorial for DataFrame </a> @@ -2095,7 +2140,7 @@ <b>13.2.2.</b> - a9a tutorial for SQL + a9a Tutorial for SQL </a> @@ -2131,7 +2176,7 @@ <b>13.3.1.</b> - E2006-tfidf regression tutorial for DataFrame + E2006-tfidf Regression Tutorial for DataFrame </a> @@ -2146,7 +2191,7 @@ <b>13.3.2.</b> - E2006-tfidf regression tutorial for SQL + E2006-tfidf Regression Tutorial for SQL </a> @@ -2166,7 +2211,7 @@ <b>13.4.</b> - Generic features + Generic Features </a> @@ -2182,7 +2227,7 @@ <b>13.4.1.</b> - Top-k join processing + Top-k Join Processing </a> @@ -2197,7 +2242,7 @@ <b>13.4.2.</b> - Other utility functions + Other Utility Functions </a> @@ -2669,7 +2714,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"Field-Aware Factorization Machines","level":"6.8.2","depth":2,"next":{"title":"News20 Multiclass Tutorial","level":"7.1","depth":1,"path":"multiclass/news20.md","ref":"multiclass/news20.md","articles":[{"title":"Data preparation","level":"7.1.1","depth":2,"path":"multiclass/news20_dataset.md","ref":"multiclass/news20_dataset.md","articles":[]},{"title":"Data preparation for one-vs-the-rest classifiers","level":"7.1.2","depth":2,"path":"multiclass/news20_one-vs-the-rest_dataset.md","ref":"multiclass/news20_one-vs-the-rest_dataset.md","articles":[]},{"title":"PA","level":"7.1.3","depth":2,"path":"multiclass/news20_pa.md","ref":"multiclass/news20_pa.md","articles":[]},{"title":"CW, AROW, SCW","level":"7.1.4","depth":2,"path":"multiclass/news20_scw.md","ref":"multiclass/news20_scw.md","articles":[]},{"title":"Ensemble learning","level":"7.1.5","depth":2,"path":"multiclass/news20_ensemble.md","ref":"multiclass/news20_ensemble.md","art icles":[]},{"title":"one-vs-the-rest classifier","level":"7.1.6","depth":2,"path":"multiclass/news20_one-vs-the-rest.md","ref":"multiclass/news20_one-vs-the-rest.md","articles":[]}]},"previous":{"title":"Data preparation","level":"6.8.1","depth":2,"path":"binaryclass/criteo_dataset.md","ref":"binaryclass/criteo_dataset.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://g ithub.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"https://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5 "},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"https://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"binaryclass/criteo_ffm.md","mtime":"2018-10-18T10:26:56.667Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-11-13T09:32:29.643Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"Field-Aware Factorization Machines","level":"6.8.2","depth":2,"next":{"title":"News20 Multiclass Tutorial","level":"7.1","depth":1,"path":"multiclass/news20.md","ref":"multiclass/news20.md","articles":[{"title":"Data Preparation","level":"7.1.1","depth":2,"path":"multiclass/news20_dataset.md","ref":"multiclass/news20_dataset.md","articles":[]},{"title":"Data Preparation for one-vs-the-rest classifiers","level":"7.1.2","depth":2,"path":"multiclass/news20_one-vs-the-rest_dataset.md","ref":"multiclass/news20_one-vs-the-rest_dataset.md","articles":[]},{"title":"PA","level":"7.1.3","depth":2,"path":"multiclass/news20_pa.md","ref":"multiclass/news20_pa.md","articles":[]},{"title":"CW, AROW, SCW","level":"7.1.4","depth":2,"path":"multiclass/news20_scw.md","ref":"multiclass/news20_scw.md","articles":[]},{"title":"Ensemble learning","level":"7.1.5","depth":2,"path":"multiclass/news20_ensemble.md","ref":"multiclass/news20_ensemble.md","art icles":[]},{"title":"one-vs-the-rest Classifier","level":"7.1.6","depth":2,"path":"multiclass/news20_one-vs-the-rest.md","ref":"multiclass/news20_one-vs-the-rest.md","articles":[]}]},"previous":{"title":"Data Preparation","level":"6.8.1","depth":2,"path":"binaryclass/criteo_dataset.md","ref":"binaryclass/criteo_dataset.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://g ithub.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"https://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5 "},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"https://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"binaryclass/criteo_ffm.md","mtime":"2018-10-18T10:26:56.667Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-12-26T10:20:07.153Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> @@ -2699,7 +2744,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda - <script src="https://cdnjs.cloudflare.com/ajax/libs/anchor-js/4.1.1/anchor.min.js"></script> + <script src="../gitbook/gitbook-plugin-anchorjs/anchor.min.js"></script>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/d9012d92/userguide/binaryclass/general.html ---------------------------------------------------------------------- diff --git a/userguide/binaryclass/general.html b/userguide/binaryclass/general.html index c940a38..45c5aa8 100644 --- a/userguide/binaryclass/general.html +++ b/userguide/binaryclass/general.html @@ -972,7 +972,7 @@ <b>6.2.1.</b> - Data preparation + Data Preparation </a> @@ -980,13 +980,28 @@ </li> - <li class="chapter " data-level="6.2.2" data-path="a9a_lr.html"> + <li class="chapter " data-level="6.2.2" data-path="a9a_generic.html"> - <a href="a9a_lr.html"> + <a href="a9a_generic.html"> <b>6.2.2.</b> + General Binary Classifier + + </a> + + + + </li> + + <li class="chapter " data-level="6.2.3" data-path="a9a_lr.html"> + + <a href="a9a_lr.html"> + + + <b>6.2.3.</b> + Logistic Regression </a> @@ -995,14 +1010,14 @@ </li> - <li class="chapter " data-level="6.2.3" data-path="a9a_minibatch.html"> + <li class="chapter " data-level="6.2.4" data-path="a9a_minibatch.html"> <a href="a9a_minibatch.html"> - <b>6.2.3.</b> + <b>6.2.4.</b> - Mini-batch gradient descent + Mini-batch Gradient Descent </a> @@ -1038,7 +1053,7 @@ <b>6.3.1.</b> - Data preparation + Data Preparation </a> @@ -1076,13 +1091,28 @@ </li> - <li class="chapter " data-level="6.3.4" data-path="news20_adagrad.html"> + <li class="chapter " data-level="6.3.4" data-path="news20_generic.html"> - <a href="news20_adagrad.html"> + <a href="news20_generic.html"> <b>6.3.4.</b> + General Binary Classifier + + </a> + + + + </li> + + <li class="chapter " data-level="6.3.5" data-path="news20_adagrad.html"> + + <a href="news20_adagrad.html"> + + + <b>6.3.5.</b> + AdaGradRDA, AdaGrad, AdaDelta </a> @@ -1091,12 +1121,12 @@ </li> - <li class="chapter " data-level="6.3.5" data-path="news20_rf.html"> + <li class="chapter " data-level="6.3.6" data-path="news20_rf.html"> <a href="news20_rf.html"> - <b>6.3.5.</b> + <b>6.3.6.</b> Random Forest @@ -1134,7 +1164,7 @@ <b>6.4.1.</b> - Data preparation + Data Preparation </a> @@ -1185,7 +1215,7 @@ <b>6.5.1.</b> - Data preparation + Data Preparation </a> @@ -1236,7 +1266,7 @@ <b>6.6.1.</b> - Data pareparation + Data Pareparation </a> @@ -1302,7 +1332,7 @@ <b>6.8.1.</b> - Data preparation + Data Preparation </a> @@ -1360,7 +1390,7 @@ <b>7.1.1.</b> - Data preparation + Data Preparation </a> @@ -1375,7 +1405,7 @@ <b>7.1.2.</b> - Data preparation for one-vs-the-rest classifiers + Data Preparation for one-vs-the-rest classifiers </a> @@ -1435,7 +1465,7 @@ <b>7.1.6.</b> - one-vs-the-rest classifier + one-vs-the-rest Classifier </a> @@ -1559,7 +1589,7 @@ <b>8.2.1.</b> - Data preparation + Data Preparation </a> @@ -1567,13 +1597,28 @@ </li> - <li class="chapter " data-level="8.2.2" data-path="../regression/e2006_arow.html"> + <li class="chapter " data-level="8.2.2" data-path="../regression/e2006_generic.html"> - <a href="../regression/e2006_arow.html"> + <a href="../regression/e2006_generic.html"> <b>8.2.2.</b> + General Regessor + + </a> + + + + </li> + + <li class="chapter " data-level="8.2.3" data-path="../regression/e2006_arow.html"> + + <a href="../regression/e2006_arow.html"> + + + <b>8.2.3.</b> + Passive Aggressive, AROW </a> @@ -1610,7 +1655,7 @@ <b>8.3.1.</b> - Data preparation + Data Preparation </a> @@ -1698,7 +1743,7 @@ <b>9.1.1.</b> - Item-based collaborative filtering + Item-based Collaborative Filtering </a> @@ -1734,7 +1779,7 @@ <b>9.2.1.</b> - Data preparation + Data Preparation </a> @@ -1749,7 +1794,7 @@ <b>9.2.2.</b> - LSH/MinHash and Jaccard similarity + LSH/MinHash and Jaccard Similarity </a> @@ -1764,7 +1809,7 @@ <b>9.2.3.</b> - LSH/MinHash and brute-force search + LSH/MinHash and Brute-force Search </a> @@ -1815,7 +1860,7 @@ <b>9.3.1.</b> - Data preparation + Data Preparation </a> @@ -1830,7 +1875,7 @@ <b>9.3.2.</b> - Item-based collaborative filtering + Item-based Collaborative Filtering </a> @@ -1875,7 +1920,7 @@ <b>9.3.5.</b> - SLIM for fast top-k recommendation + SLIM for fast top-k Recommendation </a> @@ -1890,7 +1935,7 @@ <b>9.3.6.</b> - 10-fold cross validation (Matrix Factorization) + 10-fold Cross Validation (Matrix Factorization) </a> @@ -2080,7 +2125,7 @@ <b>13.2.1.</b> - a9a tutorial for DataFrame + a9a Tutorial for DataFrame </a> @@ -2095,7 +2140,7 @@ <b>13.2.2.</b> - a9a tutorial for SQL + a9a Tutorial for SQL </a> @@ -2131,7 +2176,7 @@ <b>13.3.1.</b> - E2006-tfidf regression tutorial for DataFrame + E2006-tfidf Regression Tutorial for DataFrame </a> @@ -2146,7 +2191,7 @@ <b>13.3.2.</b> - E2006-tfidf regression tutorial for SQL + E2006-tfidf Regression Tutorial for SQL </a> @@ -2166,7 +2211,7 @@ <b>13.4.</b> - Generic features + Generic Features </a> @@ -2182,7 +2227,7 @@ <b>13.4.1.</b> - Top-k join processing + Top-k Join Processing </a> @@ -2197,7 +2242,7 @@ <b>13.4.2.</b> - Other utility functions + Other Utility Functions </a> @@ -2317,12 +2362,11 @@ specific language governing permissions and limitations under the License. --> -<p>Hivemall has a generic function for classification: <code>train_classifier</code>. Compared to the other functions we will see in the later chapters, <code>train_classifier</code> provides simpler and configureable generic interface which can be utilized to build binary classification models in a variety of settings.</p> +<p>Hivemall has a generic function for classification: <code>train_classifier</code>. Compared to the other functions we will see in the later chapters, <code>train_classifier</code> provides simpler and configurable generic interface which can be utilized to build binary classification models in a variety of settings.</p> <p>Here, we briefly introduce usage of the function. Before trying sample queries, you first need to prepare <a href="https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html#a9a" target="_blank">a9a data</a>. See <a href="a9a_dataset.html">our a9a tutorial page</a> for further instructions.</p> <!-- toc --><div id="toc" class="toc"> <ul> -<li><a href="#preparation">Preparation</a></li> <li><a href="#training">Training</a></li> <li><a href="#prediction--evaluation">Prediction & evaluation</a></li> <li><a href="#comparison-with-the-other-binary-classifiers">Comparison with the other binary classifiers</a></li> @@ -2330,15 +2374,6 @@ </div><!-- tocstop --> <div class="panel panel-primary"><div class="panel-heading"><h3 class="panel-title" id="note"><i class="fa fa-edit"></i> Note</h3></div><div class="panel-body"><p>This feature is supported from Hivemall v0.5-rc.1 or later.</p></div></div> -<h1 id="preparation">Preparation</h1> -<ul> -<li>Set <code>total_steps</code> ideally be <code>count(1) / {# of map tasks}</code>:<pre><code> hive> select count(1) from a9a_train; - hive> set hivevar:total_steps=32561; -</code></pre></li> -<li>Set <code>n_samples</code> to compute accuracy of prediction:<pre><code> hive> select count(1) from a9a_test; - hive> set hivevar:n_samples=16281; -</code></pre></li> -</ul> <h1 id="training">Training</h1> <pre><code class="lang-sql"><span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> classification_model <span class="hljs-keyword">as</span> <span class="hljs-keyword">select</span> @@ -2347,13 +2382,12 @@ <span class="hljs-keyword">from</span> ( <span class="hljs-keyword">select</span> - train_classifier(add_bias(features), label, <span class="hljs-string">'-loss logloss -opt SGD -reg no -eta simple -total_steps ${total_steps}'</span>) <span class="hljs-keyword">as</span> (feature, weight) + train_classifier(add_bias(features), label, <span class="hljs-string">'-loss logloss -opt SGD -reg no'</span>) <span class="hljs-keyword">as</span> (feature, weight) <span class="hljs-keyword">from</span> a9a_train ) t <span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span> feature; </code></pre> -<div class="panel panel-primary"><div class="panel-heading"><h3 class="panel-title" id="note"><i class="fa fa-edit"></i> Note</h3></div><div class="panel-body"><p><code>-total_steps</code> option is an optional parameter and training works without it.</p></div></div> <h1 id="prediction--evaluation">Prediction & evaluation</h1> <pre><code class="lang-sql">WITH test_exploded as ( <span class="hljs-keyword">select</span> @@ -2370,23 +2404,39 @@ predict <span class="hljs-keyword">as</span> ( sigmoid(<span class="hljs-keyword">sum</span>(m.weight * t.<span class="hljs-keyword">value</span>)) <span class="hljs-keyword">as</span> prob, (<span class="hljs-keyword">case</span> <span class="hljs-keyword">when</span> sigmoid(<span class="hljs-keyword">sum</span>(m.weight * t.<span class="hljs-keyword">value</span>)) >= <span class="hljs-number">0.5</span> <span class="hljs-keyword">then</span> <span class="hljs-number">1.0</span> <span class="hljs-keyword">else</span> <span class="hljs-number">0.0</span> <span class="hljs-keyword">end</span>)<span class="hljs-keyword">as</span> label <span class="hljs-keyword">from</span> - test_exploded t <span class="hljs-keyword">LEFT</span> <span class="hljs-keyword">OUTER</span> <span class="hljs-keyword">JOIN</span> - classification_model m <span class="hljs-keyword">ON</span> (t.feature = m.feature) + test_exploded t + <span class="hljs-keyword">LEFT</span> <span class="hljs-keyword">OUTER</span> <span class="hljs-keyword">JOIN</span> classification_model m + <span class="hljs-keyword">ON</span> (t.feature = m.feature) <span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span> t.<span class="hljs-keyword">rowid</span> ), submit <span class="hljs-keyword">as</span> ( <span class="hljs-keyword">select</span> t.label <span class="hljs-keyword">as</span> actual, - pd.label <span class="hljs-keyword">as</span> predicted, - pd.prob <span class="hljs-keyword">as</span> probability + p.label <span class="hljs-keyword">as</span> predicted, + p.prob <span class="hljs-keyword">as</span> probability <span class="hljs-keyword">from</span> - a9a_test t <span class="hljs-keyword">JOIN</span> predict pd - <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = pd.<span class="hljs-keyword">rowid</span>) + a9a_test t + <span class="hljs-keyword">JOIN</span> predict p + <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = p.<span class="hljs-keyword">rowid</span>) ) -<span class="hljs-keyword">select</span> <span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) / ${n_samples} <span class="hljs-keyword">from</span> submit -<span class="hljs-keyword">where</span> actual = predicted; +<span class="hljs-keyword">select</span> + <span class="hljs-keyword">sum</span>(<span class="hljs-keyword">if</span>(actual = predicted, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>)) / <span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">as</span> accuracy +<span class="hljs-keyword">from</span> + submit; </code></pre> +<table> +<thead> +<tr> +<th style="text-align:center">accuracy</th> +</tr> +</thead> +<tbody> +<tr> +<td style="text-align:center">0.8461396720103188</td> +</tr> +</tbody> +</table> <h1 id="comparison-with-the-other-binary-classifiers">Comparison with the other binary classifiers</h1> <p>In the next part of this user guide, our binary classification tutorials introduce many different functions:</p> <ul> @@ -2405,15 +2455,15 @@ submit <span class="hljs-keyword">as</span> ( </ul> <p>All of them actually have the same interface, but mathematical formulation and its implementation differ from each other.</p> <p>In particular, the above sample queries are almost same as <a href="a9a_lr.html">a9a tutorial using Logistic Regression</a>. The difference is only in a choice of training function: <code>logress()</code> vs. <code>train_classifier()</code>.</p> -<p>However, at the same time, the options <code>-loss logloss -opt SGD -reg no -eta simple -total_steps ${total_steps}</code> for <code>train_classifier</code> indicates that Hivemall uses the generic classifier as Logistic Regressor (<code>logress</code>). Hence, the accuracy of prediction based on either <code>logress</code> and <code>train_classifier</code> should be same under the configuration.</p> +<p>However, at the same time, the options <code>-loss logloss -opt SGD -reg no</code> for <code>train_classifier</code> indicates that Hivemall uses the generic classifier as <code>logress</code>. Hence, the accuracy of prediction based on either <code>logress</code> and <code>train_classifier</code> would be (almost) same under the configuration.</p> <p>In addition, <code>train_classifier</code> supports the <code>-mini_batch</code> option in a similar manner to <a href="a9a_minibatch.html">what <code>logress</code> does</a>. Thus, following two training queries show the same results:</p> <pre><code class="lang-sql"><span class="hljs-keyword">select</span> - logress(add_bias(features), label, <span class="hljs-string">'-total_steps ${total_steps} -mini_batch 10'</span>) <span class="hljs-keyword">as</span> (feature, weight) + logress(add_bias(features), label, <span class="hljs-string">'-mini_batch 10'</span>) <span class="hljs-keyword">as</span> (feature, weight) <span class="hljs-keyword">from</span> a9a_train </code></pre> <pre><code class="lang-sql"><span class="hljs-keyword">select</span> - train_classifier(add_bias(features), label, <span class="hljs-string">'-loss logloss -opt SGD -reg no -eta simple -total_steps ${total_steps} -mini_batch 10'</span>) <span class="hljs-keyword">as</span> (feature, weight) + train_classifier(add_bias(features), label, <span class="hljs-string">'-loss logloss -opt SGD -reg no -mini_batch 10'</span>) <span class="hljs-keyword">as</span> (feature, weight) <span class="hljs-keyword">from</span> a9a_train </code></pre> @@ -2473,7 +2523,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"Binary Classification","level":"6.1","depth":1,"next":{"title":"a9a Tutorial","level":"6.2","depth":1,"path":"binaryclass/a9a.md","ref":"binaryclass/a9a.md","articles":[{"title":"Data preparation","level":"6.2.1","depth":2,"path":"binaryclass/a9a_dataset.md","ref":"binaryclass/a9a_dataset.md","articles":[]},{"title":"Logistic Regression","level":"6.2.2","depth":2,"path":"binaryclass/a9a_lr.md","ref":"binaryclass/a9a_lr.md","articles":[]},{"title":"Mini-batch gradient descent","level":"6.2.3","depth":2,"path":"binaryclass/a9a_minibatch.md","ref":"binaryclass/a9a_minibatch.md","articles":[]}]},"previous":{"title":"Step-by-Step Tutorial on Supervised Learning","level":"5.2","depth":1,"path":"supervised_learning/tutorial.md","ref":"supervised_learning/tutorial.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename"," expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"https://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"t witter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"https://hivemall.incubator.apache.org/"}}," gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"binaryclass/general.md","mtime":"2018-11-02T10:33:52.938Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-11-13T09:32:29.643Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"Binary Classification","level":"6.1","depth":1,"next":{"title":"a9a Tutorial","level":"6.2","depth":1,"path":"binaryclass/a9a.md","ref":"binaryclass/a9a.md","articles":[{"title":"Data Preparation","level":"6.2.1","depth":2,"path":"binaryclass/a9a_dataset.md","ref":"binaryclass/a9a_dataset.md","articles":[]},{"title":"General Binary Classifier","level":"6.2.2","depth":2,"path":"binaryclass/a9a_generic.md","ref":"binaryclass/a9a_generic.md","articles":[]},{"title":"Logistic Regression","level":"6.2.3","depth":2,"path":"binaryclass/a9a_lr.md","ref":"binaryclass/a9a_lr.md","articles":[]},{"title":"Mini-batch Gradient Descent","level":"6.2.4","depth":2,"path":"binaryclass/a9a_minibatch.md","ref":"binaryclass/a9a_minibatch.md","articles":[]}]},"previous":{"title":"Step-by-Step Tutorial on Supervised Learning","level":"5.2","depth":1,"path":"supervised_learning/tutorial.md","ref":"supervised_learning/tutorial.md","articles":[]},"dir":"l tr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"s itemap":{"hostname":"https://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md "},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"https://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"binaryclass/general.md","mtime":"2018-12-26T10:16:03.078Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-12-26T10:20:07.153Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> @@ -2503,7 +2553,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda - <script src="https://cdnjs.cloudflare.com/ajax/libs/anchor-js/4.1.1/anchor.min.js"></script> + <script src="../gitbook/gitbook-plugin-anchorjs/anchor.min.js"></script> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/d9012d92/userguide/binaryclass/kdd2010a.html ---------------------------------------------------------------------- diff --git a/userguide/binaryclass/kdd2010a.html b/userguide/binaryclass/kdd2010a.html index e8edf8e..c807a7c 100644 --- a/userguide/binaryclass/kdd2010a.html +++ b/userguide/binaryclass/kdd2010a.html @@ -972,7 +972,7 @@ <b>6.2.1.</b> - Data preparation + Data Preparation </a> @@ -980,13 +980,28 @@ </li> - <li class="chapter " data-level="6.2.2" data-path="a9a_lr.html"> + <li class="chapter " data-level="6.2.2" data-path="a9a_generic.html"> - <a href="a9a_lr.html"> + <a href="a9a_generic.html"> <b>6.2.2.</b> + General Binary Classifier + + </a> + + + + </li> + + <li class="chapter " data-level="6.2.3" data-path="a9a_lr.html"> + + <a href="a9a_lr.html"> + + + <b>6.2.3.</b> + Logistic Regression </a> @@ -995,14 +1010,14 @@ </li> - <li class="chapter " data-level="6.2.3" data-path="a9a_minibatch.html"> + <li class="chapter " data-level="6.2.4" data-path="a9a_minibatch.html"> <a href="a9a_minibatch.html"> - <b>6.2.3.</b> + <b>6.2.4.</b> - Mini-batch gradient descent + Mini-batch Gradient Descent </a> @@ -1038,7 +1053,7 @@ <b>6.3.1.</b> - Data preparation + Data Preparation </a> @@ -1076,13 +1091,28 @@ </li> - <li class="chapter " data-level="6.3.4" data-path="news20_adagrad.html"> + <li class="chapter " data-level="6.3.4" data-path="news20_generic.html"> - <a href="news20_adagrad.html"> + <a href="news20_generic.html"> <b>6.3.4.</b> + General Binary Classifier + + </a> + + + + </li> + + <li class="chapter " data-level="6.3.5" data-path="news20_adagrad.html"> + + <a href="news20_adagrad.html"> + + + <b>6.3.5.</b> + AdaGradRDA, AdaGrad, AdaDelta </a> @@ -1091,12 +1121,12 @@ </li> - <li class="chapter " data-level="6.3.5" data-path="news20_rf.html"> + <li class="chapter " data-level="6.3.6" data-path="news20_rf.html"> <a href="news20_rf.html"> - <b>6.3.5.</b> + <b>6.3.6.</b> Random Forest @@ -1134,7 +1164,7 @@ <b>6.4.1.</b> - Data preparation + Data Preparation </a> @@ -1185,7 +1215,7 @@ <b>6.5.1.</b> - Data preparation + Data Preparation </a> @@ -1236,7 +1266,7 @@ <b>6.6.1.</b> - Data pareparation + Data Pareparation </a> @@ -1302,7 +1332,7 @@ <b>6.8.1.</b> - Data preparation + Data Preparation </a> @@ -1360,7 +1390,7 @@ <b>7.1.1.</b> - Data preparation + Data Preparation </a> @@ -1375,7 +1405,7 @@ <b>7.1.2.</b> - Data preparation for one-vs-the-rest classifiers + Data Preparation for one-vs-the-rest classifiers </a> @@ -1435,7 +1465,7 @@ <b>7.1.6.</b> - one-vs-the-rest classifier + one-vs-the-rest Classifier </a> @@ -1559,7 +1589,7 @@ <b>8.2.1.</b> - Data preparation + Data Preparation </a> @@ -1567,13 +1597,28 @@ </li> - <li class="chapter " data-level="8.2.2" data-path="../regression/e2006_arow.html"> + <li class="chapter " data-level="8.2.2" data-path="../regression/e2006_generic.html"> - <a href="../regression/e2006_arow.html"> + <a href="../regression/e2006_generic.html"> <b>8.2.2.</b> + General Regessor + + </a> + + + + </li> + + <li class="chapter " data-level="8.2.3" data-path="../regression/e2006_arow.html"> + + <a href="../regression/e2006_arow.html"> + + + <b>8.2.3.</b> + Passive Aggressive, AROW </a> @@ -1610,7 +1655,7 @@ <b>8.3.1.</b> - Data preparation + Data Preparation </a> @@ -1698,7 +1743,7 @@ <b>9.1.1.</b> - Item-based collaborative filtering + Item-based Collaborative Filtering </a> @@ -1734,7 +1779,7 @@ <b>9.2.1.</b> - Data preparation + Data Preparation </a> @@ -1749,7 +1794,7 @@ <b>9.2.2.</b> - LSH/MinHash and Jaccard similarity + LSH/MinHash and Jaccard Similarity </a> @@ -1764,7 +1809,7 @@ <b>9.2.3.</b> - LSH/MinHash and brute-force search + LSH/MinHash and Brute-force Search </a> @@ -1815,7 +1860,7 @@ <b>9.3.1.</b> - Data preparation + Data Preparation </a> @@ -1830,7 +1875,7 @@ <b>9.3.2.</b> - Item-based collaborative filtering + Item-based Collaborative Filtering </a> @@ -1875,7 +1920,7 @@ <b>9.3.5.</b> - SLIM for fast top-k recommendation + SLIM for fast top-k Recommendation </a> @@ -1890,7 +1935,7 @@ <b>9.3.6.</b> - 10-fold cross validation (Matrix Factorization) + 10-fold Cross Validation (Matrix Factorization) </a> @@ -2080,7 +2125,7 @@ <b>13.2.1.</b> - a9a tutorial for DataFrame + a9a Tutorial for DataFrame </a> @@ -2095,7 +2140,7 @@ <b>13.2.2.</b> - a9a tutorial for SQL + a9a Tutorial for SQL </a> @@ -2131,7 +2176,7 @@ <b>13.3.1.</b> - E2006-tfidf regression tutorial for DataFrame + E2006-tfidf Regression Tutorial for DataFrame </a> @@ -2146,7 +2191,7 @@ <b>13.3.2.</b> - E2006-tfidf regression tutorial for SQL + E2006-tfidf Regression Tutorial for SQL </a> @@ -2166,7 +2211,7 @@ <b>13.4.</b> - Generic features + Generic Features </a> @@ -2182,7 +2227,7 @@ <b>13.4.1.</b> - Top-k join processing + Top-k Join Processing </a> @@ -2197,7 +2242,7 @@ <b>13.4.2.</b> - Other utility functions + Other Utility Functions </a> @@ -2372,7 +2417,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"KDD2010a Tutorial","level":"6.4","depth":1,"next":{"title":"Data preparation","level":"6.4.1","depth":2,"path":"binaryclass/kdd2010a_dataset.md","ref":"binaryclass/kdd2010a_dataset.md","articles":[]},"previous":{"title":"Random Forest","level":"6.3.5","depth":2,"path":"binaryclass/news20_rf.md","ref":"binaryclass/news20_rf.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"}, "splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"https://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true },"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"https://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"binaryclass/kdd2010a.md","mtime":"2018-10-18T10:26:56.669Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-11-13T09:32:29.643Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"KDD2010a Tutorial","level":"6.4","depth":1,"next":{"title":"Data Preparation","level":"6.4.1","depth":2,"path":"binaryclass/kdd2010a_dataset.md","ref":"binaryclass/kdd2010a_dataset.md","articles":[]},"previous":{"title":"Random Forest","level":"6.3.6","depth":2,"path":"binaryclass/news20_rf.md","ref":"binaryclass/news20_rf.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"}, "splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"https://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true },"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"https://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"binaryclass/kdd2010a.md","mtime":"2018-10-18T10:26:56.669Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-12-26T10:20:07.153Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> @@ -2402,7 +2447,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda - <script src="https://cdnjs.cloudflare.com/ajax/libs/anchor-js/4.1.1/anchor.min.js"></script> + <script src="../gitbook/gitbook-plugin-anchorjs/anchor.min.js"></script> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/d9012d92/userguide/binaryclass/kdd2010a_dataset.html ---------------------------------------------------------------------- diff --git a/userguide/binaryclass/kdd2010a_dataset.html b/userguide/binaryclass/kdd2010a_dataset.html index 5d0aece..5ce07ce 100644 --- a/userguide/binaryclass/kdd2010a_dataset.html +++ b/userguide/binaryclass/kdd2010a_dataset.html @@ -4,7 +4,7 @@ <head> <meta charset="UTF-8"> <meta content="text/html; charset=utf-8" http-equiv="Content-Type"> - <title>Data preparation · Hivemall User Manual</title> + <title>Data Preparation · Hivemall User Manual</title> <meta http-equiv="X-UA-Compatible" content="IE=edge" /> <meta name="description" content=""> <meta name="generator" content="GitBook 3.2.3"> @@ -972,7 +972,7 @@ <b>6.2.1.</b> - Data preparation + Data Preparation </a> @@ -980,13 +980,28 @@ </li> - <li class="chapter " data-level="6.2.2" data-path="a9a_lr.html"> + <li class="chapter " data-level="6.2.2" data-path="a9a_generic.html"> - <a href="a9a_lr.html"> + <a href="a9a_generic.html"> <b>6.2.2.</b> + General Binary Classifier + + </a> + + + + </li> + + <li class="chapter " data-level="6.2.3" data-path="a9a_lr.html"> + + <a href="a9a_lr.html"> + + + <b>6.2.3.</b> + Logistic Regression </a> @@ -995,14 +1010,14 @@ </li> - <li class="chapter " data-level="6.2.3" data-path="a9a_minibatch.html"> + <li class="chapter " data-level="6.2.4" data-path="a9a_minibatch.html"> <a href="a9a_minibatch.html"> - <b>6.2.3.</b> + <b>6.2.4.</b> - Mini-batch gradient descent + Mini-batch Gradient Descent </a> @@ -1038,7 +1053,7 @@ <b>6.3.1.</b> - Data preparation + Data Preparation </a> @@ -1076,13 +1091,28 @@ </li> - <li class="chapter " data-level="6.3.4" data-path="news20_adagrad.html"> + <li class="chapter " data-level="6.3.4" data-path="news20_generic.html"> - <a href="news20_adagrad.html"> + <a href="news20_generic.html"> <b>6.3.4.</b> + General Binary Classifier + + </a> + + + + </li> + + <li class="chapter " data-level="6.3.5" data-path="news20_adagrad.html"> + + <a href="news20_adagrad.html"> + + + <b>6.3.5.</b> + AdaGradRDA, AdaGrad, AdaDelta </a> @@ -1091,12 +1121,12 @@ </li> - <li class="chapter " data-level="6.3.5" data-path="news20_rf.html"> + <li class="chapter " data-level="6.3.6" data-path="news20_rf.html"> <a href="news20_rf.html"> - <b>6.3.5.</b> + <b>6.3.6.</b> Random Forest @@ -1134,7 +1164,7 @@ <b>6.4.1.</b> - Data preparation + Data Preparation </a> @@ -1185,7 +1215,7 @@ <b>6.5.1.</b> - Data preparation + Data Preparation </a> @@ -1236,7 +1266,7 @@ <b>6.6.1.</b> - Data pareparation + Data Pareparation </a> @@ -1302,7 +1332,7 @@ <b>6.8.1.</b> - Data preparation + Data Preparation </a> @@ -1360,7 +1390,7 @@ <b>7.1.1.</b> - Data preparation + Data Preparation </a> @@ -1375,7 +1405,7 @@ <b>7.1.2.</b> - Data preparation for one-vs-the-rest classifiers + Data Preparation for one-vs-the-rest classifiers </a> @@ -1435,7 +1465,7 @@ <b>7.1.6.</b> - one-vs-the-rest classifier + one-vs-the-rest Classifier </a> @@ -1559,7 +1589,7 @@ <b>8.2.1.</b> - Data preparation + Data Preparation </a> @@ -1567,13 +1597,28 @@ </li> - <li class="chapter " data-level="8.2.2" data-path="../regression/e2006_arow.html"> + <li class="chapter " data-level="8.2.2" data-path="../regression/e2006_generic.html"> - <a href="../regression/e2006_arow.html"> + <a href="../regression/e2006_generic.html"> <b>8.2.2.</b> + General Regessor + + </a> + + + + </li> + + <li class="chapter " data-level="8.2.3" data-path="../regression/e2006_arow.html"> + + <a href="../regression/e2006_arow.html"> + + + <b>8.2.3.</b> + Passive Aggressive, AROW </a> @@ -1610,7 +1655,7 @@ <b>8.3.1.</b> - Data preparation + Data Preparation </a> @@ -1698,7 +1743,7 @@ <b>9.1.1.</b> - Item-based collaborative filtering + Item-based Collaborative Filtering </a> @@ -1734,7 +1779,7 @@ <b>9.2.1.</b> - Data preparation + Data Preparation </a> @@ -1749,7 +1794,7 @@ <b>9.2.2.</b> - LSH/MinHash and Jaccard similarity + LSH/MinHash and Jaccard Similarity </a> @@ -1764,7 +1809,7 @@ <b>9.2.3.</b> - LSH/MinHash and brute-force search + LSH/MinHash and Brute-force Search </a> @@ -1815,7 +1860,7 @@ <b>9.3.1.</b> - Data preparation + Data Preparation </a> @@ -1830,7 +1875,7 @@ <b>9.3.2.</b> - Item-based collaborative filtering + Item-based Collaborative Filtering </a> @@ -1875,7 +1920,7 @@ <b>9.3.5.</b> - SLIM for fast top-k recommendation + SLIM for fast top-k Recommendation </a> @@ -1890,7 +1935,7 @@ <b>9.3.6.</b> - 10-fold cross validation (Matrix Factorization) + 10-fold Cross Validation (Matrix Factorization) </a> @@ -2080,7 +2125,7 @@ <b>13.2.1.</b> - a9a tutorial for DataFrame + a9a Tutorial for DataFrame </a> @@ -2095,7 +2140,7 @@ <b>13.2.2.</b> - a9a tutorial for SQL + a9a Tutorial for SQL </a> @@ -2131,7 +2176,7 @@ <b>13.3.1.</b> - E2006-tfidf regression tutorial for DataFrame + E2006-tfidf Regression Tutorial for DataFrame </a> @@ -2146,7 +2191,7 @@ <b>13.3.2.</b> - E2006-tfidf regression tutorial for SQL + E2006-tfidf Regression Tutorial for SQL </a> @@ -2166,7 +2211,7 @@ <b>13.4.</b> - Generic features + Generic Features </a> @@ -2182,7 +2227,7 @@ <b>13.4.1.</b> - Top-k join processing + Top-k Join Processing </a> @@ -2197,7 +2242,7 @@ <b>13.4.2.</b> - Other utility functions + Other Utility Functions </a> @@ -2284,7 +2329,7 @@ <!-- Title --> <h1> <i class="fa fa-circle-o-notch fa-spin"></i> - <a href=".." >Data preparation</a> + <a href=".." >Data Preparation</a> </h1> </div> @@ -2439,7 +2484,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"Data preparation","level":"6.4.1","depth":2,"next":{"title":"PA, CW, AROW, SCW","level":"6.4.2","depth":2,"path":"binaryclass/kdd2010a_scw.md","ref":"binaryclass/kdd2010a_scw.md","articles":[]},"previous":{"title":"KDD2010a Tutorial","level":"6.4","depth":1,"path":"binaryclass/kdd2010a.md","ref":"binaryclass/kdd2010a.md","articles":[{"title":"Data preparation","level":"6.4.1","depth":2,"path":"binaryclass/kdd2010a_dataset.md","ref":"binaryclass/kdd2010a_dataset.md","articles":[]},{"title":"PA, CW, AROW, SCW","level":"6.4.2","depth":2,"path":"binaryclass/kdd2010a_scw.md","ref":"binaryclass/kdd2010a_scw.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css ","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"https://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Ed it","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"https://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"binaryclass/kdd2010a_dataset.md","mtime":"2018-11-02T10:33:52.939Z","type" :"markdown"},"gitbook":{"version":"3.2.3","time":"2018-11-13T09:32:29.643Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"Data Preparation","level":"6.4.1","depth":2,"next":{"title":"PA, CW, AROW, SCW","level":"6.4.2","depth":2,"path":"binaryclass/kdd2010a_scw.md","ref":"binaryclass/kdd2010a_scw.md","articles":[]},"previous":{"title":"KDD2010a Tutorial","level":"6.4","depth":1,"path":"binaryclass/kdd2010a.md","ref":"binaryclass/kdd2010a.md","articles":[{"title":"Data Preparation","level":"6.4.1","depth":2,"path":"binaryclass/kdd2010a_dataset.md","ref":"binaryclass/kdd2010a_dataset.md","articles":[]},{"title":"PA, CW, AROW, SCW","level":"6.4.2","depth":2,"path":"binaryclass/kdd2010a_scw.md","ref":"binaryclass/kdd2010a_scw.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css ","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"https://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Ed it","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"https://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"binaryclass/kdd2010a_dataset.md","mtime":"2018-12-25T07:50:37.531Z","type" :"markdown"},"gitbook":{"version":"3.2.3","time":"2018-12-26T10:20:07.153Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> @@ -2469,7 +2514,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda - <script src="https://cdnjs.cloudflare.com/ajax/libs/anchor-js/4.1.1/anchor.min.js"></script> + <script src="../gitbook/gitbook-plugin-anchorjs/anchor.min.js"></script>
