http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/eb070238/userguide/multiclass/iris.html ---------------------------------------------------------------------- diff --git a/userguide/multiclass/iris.html b/userguide/multiclass/iris.html index 3966e20..1051de6 100644 --- a/userguide/multiclass/iris.html +++ b/userguide/multiclass/iris.html @@ -980,6 +980,21 @@ </li> + <li class="chapter " data-level="6.3.5" data-path="../binaryclass/news20_rf.html"> + + <a href="../binaryclass/news20_rf.html"> + + + <b>6.3.5.</b> + + Random Forest + + </a> + + + + </li> + </ul> @@ -1324,7 +1339,7 @@ <b>7.2.3.</b> - RandomForest + Random Forest </a> @@ -2165,7 +2180,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"Iris tutorial","level":"7.2","depth":1,"next":{"title":"Data preparation","level":"7.2.1","depth":2,"path":"multiclass/iris_dataset.md","ref":"multiclass/iris_dataset.md","articles":[]},"previous":{"title":"one-vs-the-rest classifier","level":"7.1.6","depth":2,"path":"multiclass/news20_one-vs-the-rest.md","ref":"multiclass/news20_one-vs-the-rest.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache /incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showL evel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"multiclass/iris.md","mtime":"2016-12-02T08:02:42.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-06-23T09:59:20.878Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"Iris tutorial","level":"7.2","depth":1,"next":{"title":"Data preparation","level":"7.2.1","depth":2,"path":"multiclass/iris_dataset.md","ref":"multiclass/iris_dataset.md","articles":[]},"previous":{"title":"one-vs-the-rest classifier","level":"7.1.6","depth":2,"path":"multiclass/news20_one-vs-the-rest.md","ref":"multiclass/news20_one-vs-the-rest.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache /incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showL evel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"multiclass/iris.md","mtime":"2016-12-02T08:02:42.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-06-30T12:18:33.308Z"},"basePath":"..","book":{"language":""}}); }); </script> </div>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/eb070238/userguide/multiclass/iris_dataset.html ---------------------------------------------------------------------- diff --git a/userguide/multiclass/iris_dataset.html b/userguide/multiclass/iris_dataset.html index 0b3f6d0..647ffa0 100644 --- a/userguide/multiclass/iris_dataset.html +++ b/userguide/multiclass/iris_dataset.html @@ -980,6 +980,21 @@ </li> + <li class="chapter " data-level="6.3.5" data-path="../binaryclass/news20_rf.html"> + + <a href="../binaryclass/news20_rf.html"> + + + <b>6.3.5.</b> + + Random Forest + + </a> + + + + </li> + </ul> @@ -1324,7 +1339,7 @@ <b>7.2.3.</b> - RandomForest + Random Forest </a> @@ -2195,13 +2210,13 @@ from <span class="hljs-comment">-- 80% for training</span> <span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> train80p <span class="hljs-keyword">as</span> -<span class="hljs-keyword">select</span> * <span class="hljs-keyword">from</span> iris_shuffled +<span class="hljs-keyword">select</span> * <span class="hljs-keyword">from</span> iris_shuffled <span class="hljs-keyword">order</span> <span class="hljs-keyword">by</span> rnd <span class="hljs-keyword">DESC</span> <span class="hljs-keyword">limit</span> <span class="hljs-number">120</span>; <span class="hljs-comment">-- 20% for testing</span> <span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> test20p <span class="hljs-keyword">as</span> -<span class="hljs-keyword">select</span> * <span class="hljs-keyword">from</span> iris_shuffled +<span class="hljs-keyword">select</span> * <span class="hljs-keyword">from</span> iris_shuffled <span class="hljs-keyword">order</span> <span class="hljs-keyword">by</span> rnd <span class="hljs-keyword">ASC</span> <span class="hljs-keyword">limit</span> <span class="hljs-number">30</span>; @@ -2226,60 +2241,6 @@ from <span class="hljs-keyword">from</span> train80p; </code></pre> -<h1 id="training-multiclass-classification">Training (multiclass classification)</h1> -<pre><code class="lang-sql"><span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> model_scw1 <span class="hljs-keyword">as</span> -<span class="hljs-keyword">select</span> - label, - feature, - argmin_kld(weight, covar) <span class="hljs-keyword">as</span> weight -<span class="hljs-keyword">from</span> - (<span class="hljs-keyword">select</span> - train_multiclass_scw(features, label) <span class="hljs-keyword">as</span> (label, feature, weight, covar) - <span class="hljs-keyword">from</span> - training_x10 - ) t -<span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span> label, feature; -</code></pre> -<h1 id="predict">Predict</h1> -<pre><code class="lang-sql"><span class="hljs-keyword">create</span> <span class="hljs-keyword">or</span> <span class="hljs-keyword">replace</span> <span class="hljs-keyword">view</span> predict_scw1 -<span class="hljs-keyword">as</span> -<span class="hljs-keyword">select</span> - <span class="hljs-keyword">rowid</span>, - m.col0 <span class="hljs-keyword">as</span> score, - m.col1 <span class="hljs-keyword">as</span> label -<span class="hljs-keyword">from</span> ( -<span class="hljs-keyword">select</span> - <span class="hljs-keyword">rowid</span>, - maxrow(score, label) <span class="hljs-keyword">as</span> m -<span class="hljs-keyword">from</span> ( - <span class="hljs-keyword">select</span> - t.<span class="hljs-keyword">rowid</span>, - m.label, - <span class="hljs-keyword">sum</span>(m.weight * t.<span class="hljs-keyword">value</span>) <span class="hljs-keyword">as</span> score - <span class="hljs-keyword">from</span> - test20p_exploded t <span class="hljs-keyword">LEFT</span> <span class="hljs-keyword">OUTER</span> <span class="hljs-keyword">JOIN</span> - model_scw1 m <span class="hljs-keyword">ON</span> (t.feature = m.feature) - <span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span> - t.<span class="hljs-keyword">rowid</span>, m.label -) t1 -<span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span> <span class="hljs-keyword">rowid</span> -) t2; -</code></pre> -<h1 id="evaluation">Evaluation</h1> -<pre><code class="lang-sql"><span class="hljs-keyword">create</span> <span class="hljs-keyword">or</span> <span class="hljs-keyword">replace</span> <span class="hljs-keyword">view</span> eval_scw1 <span class="hljs-keyword">as</span> -<span class="hljs-keyword">select</span> - t.label <span class="hljs-keyword">as</span> actual, - p.label <span class="hljs-keyword">as</span> predicted -<span class="hljs-keyword">from</span> - test20p t <span class="hljs-keyword">JOIN</span> predict_scw1 p - <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = p.<span class="hljs-keyword">rowid</span>); - -<span class="hljs-keyword">select</span> <span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>)/<span class="hljs-number">30</span> <span class="hljs-keyword">from</span> eval_scw1 -<span class="hljs-keyword">where</span> actual = predicted; -</code></pre> -<blockquote> -<p>0.9666666666666667</p> -</blockquote> <p><div id="page-footer" class="localized-footer"><hr><!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file @@ -2335,7 +2296,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"Data preparation","level":"7.2.1","depth":2,"next":{"title":"SCW","level":"7.2.2","depth":2,"path":"multiclass/iris_scw.md","ref":"multiclass/iris_scw.md","articles":[]},"previous":{"title":"Iris tutorial","level":"7.2","depth":1,"path":"multiclass/iris.md","ref":"multiclass/iris.md","articles":[{"title":"Data preparation","level":"7.2.1","depth":2,"path":"multiclass/iris_dataset.md","ref":"multiclass/iris_dataset.md","articles":[]},{"title":"SCW","level":"7.2.2","depth":2,"path":"multiclass/iris_scw.md","ref":"multiclass/iris_scw.md","articles":[]},{"title":"RandomForest","level":"7.2.3","depth":2,"path":"multiclass/iris_randomforest.md","ref":"multiclass/iris_randomforest.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localiz ed-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["fac ebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"multiclass/iri s_dataset.md","mtime":"2016-12-02T08:02:42.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-06-23T09:59:20.878Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"Data preparation","level":"7.2.1","depth":2,"next":{"title":"SCW","level":"7.2.2","depth":2,"path":"multiclass/iris_scw.md","ref":"multiclass/iris_scw.md","articles":[]},"previous":{"title":"Iris tutorial","level":"7.2","depth":1,"path":"multiclass/iris.md","ref":"multiclass/iris.md","articles":[{"title":"Data preparation","level":"7.2.1","depth":2,"path":"multiclass/iris_dataset.md","ref":"multiclass/iris_dataset.md","articles":[]},{"title":"SCW","level":"7.2.2","depth":2,"path":"multiclass/iris_scw.md","ref":"multiclass/iris_scw.md","articles":[]},{"title":"Random Forest","level":"7.2.3","depth":2,"path":"multiclass/iris_randomforest.md","ref":"multiclass/iris_randomforest.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","locali zed-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["fa cebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"multiclass/ir is_dataset.md","mtime":"2017-06-30T12:15:19.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-06-30T12:18:33.308Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/eb070238/userguide/multiclass/iris_randomforest.html ---------------------------------------------------------------------- diff --git a/userguide/multiclass/iris_randomforest.html b/userguide/multiclass/iris_randomforest.html index 4c08ba2..fa701ae 100644 --- a/userguide/multiclass/iris_randomforest.html +++ b/userguide/multiclass/iris_randomforest.html @@ -4,7 +4,7 @@ <head> <meta charset="UTF-8"> <meta content="text/html; charset=utf-8" http-equiv="Content-Type"> - <title>RandomForest · Hivemall User Manual</title> + <title>Random Forest · Hivemall User Manual</title> <meta http-equiv="X-UA-Compatible" content="IE=edge" /> <meta name="description" content=""> <meta name="generator" content="GitBook 3.2.2"> @@ -980,6 +980,21 @@ </li> + <li class="chapter " data-level="6.3.5" data-path="../binaryclass/news20_rf.html"> + + <a href="../binaryclass/news20_rf.html"> + + + <b>6.3.5.</b> + + Random Forest + + </a> + + + + </li> + </ul> @@ -1324,7 +1339,7 @@ <b>7.2.3.</b> - RandomForest + Random Forest </a> @@ -2077,7 +2092,7 @@ <!-- Title --> <h1> <i class="fa fa-circle-o-notch fa-spin"></i> - <a href=".." >RandomForest</a> + <a href=".." >Random Forest</a> </h1> </div> @@ -2169,7 +2184,7 @@ $ sed '/^$/d' iris.data | hadoop fs -put - /dataset/iris/raw/iris.data <h1 id="training">Training</h1> <p><code>train_randomforest_classifier</code> takes a dense <code>features</code> in double[] and a <code>label</code> starting from 0.</p> <pre><code class="lang-sql"><span class="hljs-keyword">CREATE</span> <span class="hljs-keyword">TABLE</span> <span class="hljs-keyword">model</span> -<span class="hljs-keyword">STORED</span> <span class="hljs-keyword">AS</span> SEQUENCEFILE + <span class="hljs-keyword">STORED</span> <span class="hljs-keyword">AS</span> SEQUENCEFILE <span class="hljs-keyword">AS</span> <span class="hljs-keyword">select</span> train_randomforest_classifier(features, label) @@ -2180,56 +2195,91 @@ $ sed '/^$/d' iris.data | hadoop fs -put - /dataset/iris/raw/iris.data <span class="hljs-keyword">from</span> training; </code></pre> -<p><em>Note: The default TEXTFILE should not be used for model table when using Javascript output through "-output javascript" option.</em></p> -<pre><code>hive> desc model; -model_id int -model_type int -pred_model string -var_importance array<double> -oob_errors int -oob_tests int -</code></pre><h2 id="training-options">Training options</h2> -<p>"-help" option shows usage of the function.</p> -<pre><code>select train_randomforest_classifier(features, label, "-help") from training; +<div class="panel panel-warning"><div class="panel-heading"><h3 class="panel-title" id="caution"><i class="fa fa-exclamation-triangle"></i> Caution</h3></div><div class="panel-body"><p>The default <code>TEXTFILE</code> should not be used for model table when using Javascript output through <code>-output javascript</code> option.</p></div></div> +<pre><code class="lang-sql">hive> desc extended model; +</code></pre> +<table> +<thead> +<tr> +<th style="text-align:center">col_name</th> +<th style="text-align:center">data_type </th> +</tr> +</thead> +<tbody> +<tr> +<td style="text-align:center">model_id</td> +<td style="text-align:center">string</td> +</tr> +<tr> +<td style="text-align:center">model_weight</td> +<td style="text-align:center">double</td> +</tr> +<tr> +<td style="text-align:center">model</td> +<td style="text-align:center">string</td> +</tr> +<tr> +<td style="text-align:center">var_importance</td> +<td style="text-align:center">array<double></double></td> +</tr> +<tr> +<td style="text-align:center">oob_errors</td> +<td style="text-align:center">int</td> +</tr> +<tr> +<td style="text-align:center">oob_tests</td> +<td style="text-align:center">int</td> +</tr> +</tbody> +</table> +<h2 id="training-options">Training options</h2> +<p><code>-help</code> option shows usage of the function.</p> +<pre><code class="lang-sql">select train_randomforest_classifier(features, label, "-help") from training; > FAILED: UDFArgumentException -usage: train_randomforest_classifier(double[] features, int label [, - string options]) - Returns a relation consists of <int model_id, - int model_type, string pred_model, array<double> var_importance, - int oob_errors, int oob_tests> [-attrs <arg>] [-depth <arg>] - [-disable_compression] [-help] [-leafs <arg>] [-output <arg>] - [-rule <arg>] [-seed <arg>] [-splits <arg>] [-trees <arg>] [-vars - <arg>] - -attrs,--attribute_types <arg> Comma separated attribute types (Q for - quantitative variable and C for - categorical variable. e.g., [Q,C,Q,C]) - -depth,--max_depth <arg> The maximum number of the tree depth - [default: Integer.MAX_VALUE] - -disable_compression Whether to disable compression of the - output script [default: false] - -help Show function help - -leafs,--max_leaf_nodes <arg> The maximum number of leaf nodes - [default: Integer.MAX_VALUE] - -output,--output_type <arg> The output type (serialization/ser or - opscode/vm or javascript/js) [default: - serialization] - -rule,--split_rule <arg> Split algorithm [default: GINI, ENTROPY] - -seed <arg> seed value in long [default: -1 - (random)] - -splits,--min_split <arg> A node that has greater than or equals - to `min_split` examples will split - [default: 2] - -trees,--num_trees <arg> The number of trees for each task - [default: 50] - -vars,--num_variables <arg> The number of random selected features - [default: ceil(sqrt(x[0].length))]. - int(num_variables * x[0].length) is - considered if num_variable is (0,1] -</code></pre><p><em>Caution: "-num_trees" controls the number of trees for each task, not the total number of trees.</em></p> +usage: train_randomforest_classifier(array<double|string> features, int + label [, const array<double> classWeights, const string options]) - + Returns a relation consists of <int model_id, int model_type, + string pred_model, array<double> var_importance, int oob_errors, + int oob_tests, double weight> [-attrs <arg>] [-depth <arg>] [-help] + [-leafs <arg>] [-min_samples_leaf <arg>] [-rule <arg>] [-seed + <arg>] [-splits <arg>] [-stratified] [-subsample <arg>] [-trees + <arg>] [-vars <arg>] + -attrs,--attribute_types <arg> Comma separated attribute types (Q + for quantitative variable and C for + categorical variable. e.g., + [Q,C,Q,C]) + -depth,--max_depth <arg> The maximum number of the tree depth + [default: Integer.MAX_VALUE] + -help Show function help + -leafs,--max_leaf_nodes <arg> The maximum number of leaf nodes + [default: Integer.MAX_VALUE] + -min_samples_leaf <arg> The minimum number of samples in a + leaf node [default: 1] + -rule,--split_rule <arg> Split algorithm [default: GINI, + ENTROPY] + -seed <arg> seed value in long [default: -1 + (random)] + -splits,--min_split <arg> A node that has greater than or + equals to `min_split` examples will + split [default: 2] + -stratified,--stratified_sampling Enable Stratified sampling for + unbalanced data + -subsample <arg> Sampling rate in range (0.0,1.0] + -trees,--num_trees <arg> The number of trees for each task + [default: 50] + -vars,--num_variables <arg> The number of random selected + features [default: + ceil(sqrt(x[0].length))]. + int(num_variables * x[0].length) is + considered if num_variable is (0,1 +</code></pre> +<div class="panel panel-warning"><div class="panel-heading"><h3 class="panel-title" id="caution"><i class="fa fa-exclamation-triangle"></i> Caution</h3></div><div class="panel-body"><p><code>-num_trees</code> controls the number of trees for each task, not the total number of trees.</p></div></div> <h3 id="parallelize-training">Parallelize Training</h3> <p>To parallelize RandomForest training, you can use UNION ALL as follows:</p> <pre><code class="lang-sql"><span class="hljs-keyword">CREATE</span> <span class="hljs-keyword">TABLE</span> <span class="hljs-keyword">model</span> -<span class="hljs-keyword">STORED</span> <span class="hljs-keyword">AS</span> SEQUENCEFILE + <span class="hljs-keyword">STORED</span> <span class="hljs-keyword">AS</span> ORC tblproperties(<span class="hljs-string">"orc.compress"</span>=<span class="hljs-string">"SNAPPY"</span>) + <span class="hljs-comment">-- STORED AS SEQUENCEFILE </span> <span class="hljs-keyword">AS</span> <span class="hljs-keyword">select</span> train_randomforest_classifier(features, label, <span class="hljs-string">'-trees 25'</span>) @@ -2251,64 +2301,31 @@ usage: train_randomforest_classifier(double[] features, int label [, <span class="hljs-keyword">model</span>; </code></pre> <blockquote> -<p>[2.81010338879605,0.4970357753626371,23.790369091407698,14.315316390235273] 0.05333333333333334</p> +<p>[6.837674865013268,4.1317115752776665,24.331571871930226,25.677497925673062] 0.056666666666666664</p> </blockquote> -<h3 id="output-prediction-model-by-javascipt">Output prediction model by Javascipt</h3> -<pre><code class="lang-sql"><span class="hljs-keyword">CREATE</span> <span class="hljs-keyword">TABLE</span> model_javascript -<span class="hljs-keyword">STORED</span> <span class="hljs-keyword">AS</span> SEQUENCEFILE -<span class="hljs-keyword">AS</span> -<span class="hljs-keyword">select</span> train_randomforest_classifier(features, label, <span class="hljs-string">"-output_type js -disable_compression"</span>) -<span class="hljs-keyword">from</span> training; - -<span class="hljs-keyword">select</span> <span class="hljs-keyword">model</span> <span class="hljs-keyword">from</span> model_javascript <span class="hljs-keyword">limit</span> <span class="hljs-number">1</span>; -</code></pre> -<pre><code class="lang-js"><span class="hljs-keyword">if</span>(x[<span class="hljs-number">3</span>] <= <span class="hljs-number">0.5</span>) { - <span class="hljs-number">0</span>; -} <span class="hljs-keyword">else</span> { - <span class="hljs-keyword">if</span>(x[<span class="hljs-number">2</span>] <= <span class="hljs-number">4.5</span>) { - <span class="hljs-keyword">if</span>(x[<span class="hljs-number">3</span>] <= <span class="hljs-number">1.5</span>) { - <span class="hljs-keyword">if</span>(x[<span class="hljs-number">0</span>] <= <span class="hljs-number">4.5</span>) { - <span class="hljs-number">1</span>; - } <span class="hljs-keyword">else</span> { - <span class="hljs-keyword">if</span>(x[<span class="hljs-number">0</span>] <= <span class="hljs-number">5.5</span>) { - <span class="hljs-number">1</span>; - } <span class="hljs-keyword">else</span> { - <span class="hljs-keyword">if</span>(x[<span class="hljs-number">1</span>] <= <span class="hljs-number">2.5</span>) { - <span class="hljs-number">1</span>; - } <span class="hljs-keyword">else</span> { - <span class="hljs-number">1</span>; - } - } - } - } <span class="hljs-keyword">else</span> { - <span class="hljs-number">2</span>; - } - } <span class="hljs-keyword">else</span> { - <span class="hljs-keyword">if</span>(x[<span class="hljs-number">3</span>] <= <span class="hljs-number">1.5</span>) { - <span class="hljs-number">2</span>; - } <span class="hljs-keyword">else</span> { - <span class="hljs-number">2</span>; - } - } -} -</code></pre> <h1 id="prediction">Prediction</h1> <pre><code class="lang-sql"><span class="hljs-keyword">set</span> hivevar:classification=<span class="hljs-literal">true</span>; <span class="hljs-keyword">set</span> hive.<span class="hljs-keyword">auto</span>.<span class="hljs-keyword">convert</span>.<span class="hljs-keyword">join</span>=<span class="hljs-literal">true</span>; <span class="hljs-keyword">set</span> hive.mapjoin.optimized.hashtable=<span class="hljs-literal">false</span>; -<span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> predicted_vm +<span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> predicted <span class="hljs-keyword">as</span> <span class="hljs-keyword">SELECT</span> <span class="hljs-keyword">rowid</span>, - rf_ensemble(predicted) <span class="hljs-keyword">as</span> predicted + <span class="hljs-comment">-- rf_ensemble(predicted) as predicted</span> + <span class="hljs-comment">-- hivemall v0.5-rc.1 or later</span> + rf_ensemble(predicted.<span class="hljs-keyword">value</span>, predicted.posteriori, model_weight) <span class="hljs-keyword">as</span> predicted + <span class="hljs-comment">-- rf_ensemble(predicted.value, predicted.posteriori) as predicted -- avoid OOB accuracy (i.e., model_weight)</span> <span class="hljs-keyword">FROM</span> ( <span class="hljs-keyword">SELECT</span> <span class="hljs-keyword">rowid</span>, <span class="hljs-comment">-- hivemall v0.4.1-alpha.2 and before</span> <span class="hljs-comment">-- tree_predict(p.model, t.features, ${classification}) as predicted</span> <span class="hljs-comment">-- hivemall v0.4.1 and later</span> - tree_predict(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) <span class="hljs-keyword">as</span> predicted + <span class="hljs-comment">-- tree_predict(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) as predicted</span> + <span class="hljs-comment">-- hivemall v0.5-rc.1 or later</span> + p.model_weight, + tree_predict(p.model_id, p.<span class="hljs-keyword">model</span>, t.features, ${classification}) <span class="hljs-keyword">as</span> predicted <span class="hljs-keyword">FROM</span> <span class="hljs-keyword">model</span> p <span class="hljs-keyword">LEFT</span> <span class="hljs-keyword">OUTER</span> <span class="hljs-keyword">JOIN</span> <span class="hljs-comment">-- CROSS JOIN</span> @@ -2318,7 +2335,6 @@ usage: train_randomforest_classifier(double[] features, int label [, <span class="hljs-keyword">rowid</span> ; </code></pre> -<p><em>Note: Javascript outputs can be evaluated by <code>js_tree_predict</code>.</em></p> <h3 id="parallelize-prediction">Parallelize Prediction</h3> <p>The following query runs predictions in N-parallel. It would reduce elapsed time for prediction almost by N.</p> <pre><code class="lang-sql"><span class="hljs-keyword">SET</span> hivevar:classification=<span class="hljs-literal">true</span>; @@ -2326,20 +2342,29 @@ usage: train_randomforest_classifier(double[] features, int label [, <span class="hljs-keyword">SET</span> hive.mapjoin.optimized.hashtable=<span class="hljs-literal">false</span>; <span class="hljs-keyword">SET</span> mapred.reduce.tasks=<span class="hljs-number">8</span>; -<span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> predicted_vm +<span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> predicted <span class="hljs-keyword">as</span> <span class="hljs-keyword">SELECT</span> <span class="hljs-keyword">rowid</span>, - rf_ensemble(predicted) <span class="hljs-keyword">as</span> predicted + <span class="hljs-comment">-- rf_ensemble(predicted) as predicted</span> + <span class="hljs-comment">-- hivemall v0.5-rc.1 or later</span> + rf_ensemble(predicted.<span class="hljs-keyword">value</span>, predicted.posteriori, model_weight) <span class="hljs-keyword">as</span> predicted + <span class="hljs-comment">-- rf_ensemble(predicted.value, predicted.posteriori) as predicted -- avoid OOB accuracy (i.e., model_weight)</span> <span class="hljs-keyword">FROM</span> ( <span class="hljs-keyword">SELECT</span> t.<span class="hljs-keyword">rowid</span>, <span class="hljs-comment">-- hivemall v0.4.1-alpha.2 and before</span> <span class="hljs-comment">-- tree_predict(p.pred_model, t.features, ${classification}) as predicted</span> <span class="hljs-comment">-- hivemall v0.4.1 and later</span> - tree_predict(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) <span class="hljs-keyword">as</span> predicted + <span class="hljs-comment">-- tree_predict(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) as predicted</span> + <span class="hljs-comment">-- hivemall v0.5-rc.1 or later</span> + p.model_weight, + tree_predict(p.model_id, p.<span class="hljs-keyword">model</span>, t.features, ${classification}) <span class="hljs-keyword">as</span> predicted <span class="hljs-keyword">FROM</span> ( - <span class="hljs-keyword">SELECT</span> model_id, model_type, pred_model + <span class="hljs-keyword">SELECT</span> + <span class="hljs-comment">-- model_id, model_type, pred_model</span> + <span class="hljs-comment">-- hivemall v0.5-rc.1 or later</span> + model_id, model_weight, <span class="hljs-keyword">model</span> <span class="hljs-keyword">FROM</span> <span class="hljs-keyword">model</span> <span class="hljs-keyword">DISTRIBUTE</span> <span class="hljs-keyword">BY</span> <span class="hljs-keyword">rand</span>(<span class="hljs-number">1</span>) ) p @@ -2350,32 +2375,92 @@ usage: train_randomforest_classifier(double[] features, int label [, ; </code></pre> <h1 id="evaluation">Evaluation</h1> -<pre><code class="lang-sql">select count(1) from training; -> 150 - -set hivevar:total_cnt=150; +<pre><code class="lang-sql"><span class="hljs-keyword">select</span> <span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">from</span> training; +</code></pre> +<blockquote> +<p>150</p> +</blockquote> +<pre><code class="lang-sql"><span class="hljs-keyword">set</span> hivevar:total_cnt=<span class="hljs-number">150</span>; WITH t1 as ( -SELECT - t.rowid, - t.label as actual, - p.predicted.label as predicted -FROM - predicted_vm p - LEFT OUTER JOIN training t ON (t.rowid = p.rowid) +<span class="hljs-keyword">SELECT</span> + t.<span class="hljs-keyword">rowid</span>, + t.label <span class="hljs-keyword">as</span> actual, + p.predicted.label <span class="hljs-keyword">as</span> predicted +<span class="hljs-keyword">FROM</span> + predicted p + <span class="hljs-keyword">LEFT</span> <span class="hljs-keyword">OUTER</span> <span class="hljs-keyword">JOIN</span> training t <span class="hljs-keyword">ON</span> (t.<span class="hljs-keyword">rowid</span> = p.<span class="hljs-keyword">rowid</span>) ) -SELECT - count(1) / ${total_cnt} -FROM +<span class="hljs-keyword">SELECT</span> + <span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) / ${total_cnt} +<span class="hljs-keyword">FROM</span> t1 -WHERE +<span class="hljs-keyword">WHERE</span> actual = predicted ; </code></pre> <blockquote> -<p>0.9533333333333334</p> +<p>0.98</p> </blockquote> -<p><div id="page-footer" class="localized-footer"><hr><!-- +<h1 id="graphvis-export">Graphvis export</h1> +<div class="panel panel-primary"><div class="panel-heading"><h3 class="panel-title" id="note"><i class="fa fa-edit"></i> Note</h3></div><div class="panel-body"><p><code>tree_export</code> feature is supported from Hivemall v0.5-rc.1 or later. +Better to limit tree depth on training by <code>-depth</code> option to plot a Decision Tree.</p></div></div> +<p>Hivemall provide <code>tree_export</code> to export a decision tree into <a href="http://www.graphviz.org/" target="_blank">Graphviz</a> or human-readable Javascript format. You can find the usage by issuing the following query:</p> +<pre><code>> select tree_export("","-help"); + +usage: tree_export(string model, const string options, optional + array<string> featureNames=null, optional array<string> + classNames=null) - exports a Decision Tree model as javascript/dot] + [-help] [-output_name <arg>] [-r] [-t <arg>] + -help Show function help + -output_name,--outputName <arg> output name [default: predicted] + -r,--regression Is regression tree or not + -t,--type <arg> Type of output [default: js, + javascript/js, graphvis/dot +</code></pre><pre><code class="lang-sql"><span class="hljs-keyword">CREATE</span> <span class="hljs-keyword">TABLE</span> model_exported + <span class="hljs-keyword">STORED</span> <span class="hljs-keyword">AS</span> ORC tblproperties(<span class="hljs-string">"orc.compress"</span>=<span class="hljs-string">"SNAPPY"</span>) +<span class="hljs-keyword">AS</span> +<span class="hljs-keyword">select</span> + model_id, + tree_export(<span class="hljs-keyword">model</span>, <span class="hljs-string">"-type javascript"</span>, <span class="hljs-built_in">array</span>(<span class="hljs-string">'sepal_length'</span>,<span class="hljs-string">'sepal_width'</span>,<span class="hljs-string">'petal_length'</span>,<span class="hljs-string">'petak_width'</span>), <span class="hljs-built_in">array</span>(<span class="hljs-string">'Setosa'</span>,<span class="hljs-string">'Versicolour'</span>,<span class="hljs-string">'Virginica'</span>)) <span class="hljs-keyword">as</span> js, + tree_export(<span class="hljs-keyword">model</span>, <span class="hljs-string">"-type graphvis"</span>, <span class="hljs-built_in">array</span>(<span class="hljs-string">'sepal_length'</span>,<span class="hljs-string">'sepal_width'</span>,<span class="hljs-string">'petal_length'</span>,<span class="hljs-string">'petak_width'</span>), <span class="hljs-built_in">array</span>(<span class="hljs-string">'Setosa'</span>,<span class="hljs-string">'Versicolour'</span>,<span class="hljs-string">'Virginica'</span>)) <span class="hljs-keyword">as</span> dot +<span class="hljs-keyword">from</span> + <span class="hljs-keyword">model</span> +<span class="hljs-comment">-- limit 1</span> +; +</code></pre> +<pre><code>digraph Tree { + node [shape=box, style="filled, rounded", color="black", fontname=helvetica]; + edge [fontname=helvetica]; + 0 [label=<petal_length &le; 2.599999964237213>, fillcolor="#00000000"]; + 1 [label=<predicted = Setosa>, fillcolor="0.0000,1.000,1.000", shape=ellipse]; + 0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"]; + 2 [label=<petal_length &le; 4.950000047683716>, fillcolor="#00000000"]; + 0 -> 2 [labeldistance=2.5, labelangle=-45, headlabel="False"]; + 3 [label=<petak_width &le; 1.6500000357627869>, fillcolor="#00000000"]; + 2 -> 3; + 4 [label=<predicted = Versicolour>, fillcolor="0.3333,1.000,1.000", shape=ellipse]; + 3 -> 4; + 5 [label=<sepal_width &le; 3.100000023841858>, fillcolor="#00000000"]; + 3 -> 5; + 6 [label=<predicted = Virginica>, fillcolor="0.6667,1.000,1.000", shape=ellipse]; + 5 -> 6; + 7 [label=<predicted = Versicolour>, fillcolor="0.3333,1.000,1.000", shape=ellipse]; + 5 -> 7; + 8 [label=<petak_width &le; 1.75>, fillcolor="#00000000"]; + 2 -> 8; + 9 [label=<petal_length &le; 5.299999952316284>, fillcolor="#00000000"]; + 8 -> 9; + 10 [label=<predicted = Versicolour>, fillcolor="0.3333,1.000,1.000", shape=ellipse]; + 9 -> 10; + 11 [label=<predicted = Virginica>, fillcolor="0.6667,1.000,1.000", shape=ellipse]; + 9 -> 11; + 12 [label=<predicted = Virginica>, fillcolor="0.6667,1.000,1.000", shape=ellipse]; + 8 -> 12; +} +</code></pre><p><img src="../resources/images/iris.png" alt="Iris Graphvis output"></p> +<p>You can draw a graph by <code>dot -Tpng iris.dot -o iris.png</code> or using <a href="http://viz-js.com/" target="_blank">Viz.js</a>. +<div id="page-footer" class="localized-footer"><hr><!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information @@ -2430,7 +2515,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"RandomForest","level":"7.2.3","depth":2,"next":{"title":"Regression","level":"8.1","depth":1,"path":"regression/general.md","ref":"regression/general.md","articles":[]},"previous":{"title":"SCW","level":"7.2.2","depth":2,"path":"multiclass/iris_scw.md","ref":"multiclass/iris_scw.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"bas e":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5 "},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"multiclass/iris_randomforest.md","mtime":"2016-12-02T08:02:42.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-06-23T09:59:20.878Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"Random Forest","level":"7.2.3","depth":2,"next":{"title":"Regression","level":"8.1","depth":1,"path":"regression/general.md","ref":"regression/general.md","articles":[]},"previous":{"title":"SCW","level":"7.2.2","depth":2,"path":"multiclass/iris_scw.md","ref":"multiclass/iris_scw.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"ba se":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h 5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"multiclass/iris_randomforest.md","mtime":"2017-06-30T12:15:19.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-06-30T12:18:33.308Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/eb070238/userguide/multiclass/iris_scw.html ---------------------------------------------------------------------- diff --git a/userguide/multiclass/iris_scw.html b/userguide/multiclass/iris_scw.html index f4dd58d..2ba6769 100644 --- a/userguide/multiclass/iris_scw.html +++ b/userguide/multiclass/iris_scw.html @@ -980,6 +980,21 @@ </li> + <li class="chapter " data-level="6.3.5" data-path="../binaryclass/news20_rf.html"> + + <a href="../binaryclass/news20_rf.html"> + + + <b>6.3.5.</b> + + Random Forest + + </a> + + + + </li> + </ul> @@ -1324,7 +1339,7 @@ <b>7.2.3.</b> - RandomForest + Random Forest </a> @@ -2110,271 +2125,59 @@ specific language governing permissions and limitations under the License. --> -<p><em>NOTE: RandomForest is being supported from Hivemall v0.4 or later.</em></p> -<h1 id="dataset">Dataset</h1> -<ul> -<li><a href="https://archive.ics.uci.edu/ml/datasets/Iris" target="_blank">https://archive.ics.uci.edu/ml/datasets/Iris</a></li> -</ul> -<pre><code>Attribute Information: - 1. sepal length in cm - 2. sepal width in cm - 3. petal length in cm - 4. petal width in cm - 5. class: - -- Iris Setosa - -- Iris Versicolour - -- Iris Virginica -</code></pre><h1 id="table-preparation">Table preparation</h1> -<pre><code class="lang-sql"><span class="hljs-keyword">create</span> <span class="hljs-keyword">database</span> iris; -<span class="hljs-keyword">use</span> iris; - -<span class="hljs-keyword">create</span> <span class="hljs-keyword">external</span> <span class="hljs-keyword">table</span> <span class="hljs-keyword">raw</span> ( - sepal_length <span class="hljs-built_in">int</span>, - sepal_width <span class="hljs-built_in">int</span>, - petal_length <span class="hljs-built_in">int</span>, - petak_width <span class="hljs-built_in">int</span>, - <span class="hljs-keyword">class</span> <span class="hljs-keyword">string</span> -) -<span class="hljs-keyword">ROW</span> <span class="hljs-keyword">FORMAT</span> <span class="hljs-keyword">DELIMITED</span> - <span class="hljs-keyword">FIELDS</span> <span class="hljs-keyword">TERMINATED</span> <span class="hljs-keyword">BY</span> <span class="hljs-string">','</span> - <span class="hljs-keyword">LINES</span> <span class="hljs-keyword">TERMINATED</span> <span class="hljs-keyword">BY</span> <span class="hljs-string">'\n'</span> -<span class="hljs-keyword">STORED</span> <span class="hljs-keyword">AS</span> TEXTFILE LOCATION <span class="hljs-string">'/dataset/iris/raw'</span>; - -$ sed '/^$/d' iris.data | hadoop fs -put - /dataset/iris/raw/iris.data -</code></pre> -<pre><code class="lang-sql"><span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> label_mapping -<span class="hljs-keyword">as</span> -<span class="hljs-keyword">select</span> - <span class="hljs-keyword">class</span>, - <span class="hljs-keyword">rank</span> - <span class="hljs-number">1</span> <span class="hljs-keyword">as</span> label -<span class="hljs-keyword">from</span> ( -<span class="hljs-keyword">select</span> - <span class="hljs-keyword">distinct</span> <span class="hljs-keyword">class</span>, - <span class="hljs-keyword">dense_rank</span>() <span class="hljs-keyword">over</span> (<span class="hljs-keyword">order</span> <span class="hljs-keyword">by</span> <span class="hljs-keyword">class</span>) <span class="hljs-keyword">as</span> <span class="hljs-keyword">rank</span> +<h1 id="training-multiclass-classification">Training (multiclass classification)</h1> +<pre><code class="lang-sql"><span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> model_scw1 <span class="hljs-keyword">as</span> +<span class="hljs-keyword">select</span> + label, + feature, + argmin_kld(weight, covar) <span class="hljs-keyword">as</span> weight <span class="hljs-keyword">from</span> - <span class="hljs-keyword">raw</span> -) t -; + (<span class="hljs-keyword">select</span> + train_multiclass_scw(features, label) <span class="hljs-keyword">as</span> (label, feature, weight, covar) + <span class="hljs-keyword">from</span> + training_x10 + ) t +<span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span> label, feature; </code></pre> -<pre><code class="lang-sql"><span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> training +<h1 id="predict">Predict</h1> +<pre><code class="lang-sql"><span class="hljs-keyword">create</span> <span class="hljs-keyword">or</span> <span class="hljs-keyword">replace</span> <span class="hljs-keyword">view</span> predict_scw1 <span class="hljs-keyword">as</span> -<span class="hljs-keyword">select</span> - <span class="hljs-keyword">rowid</span>() <span class="hljs-keyword">as</span> <span class="hljs-keyword">rowid</span>, - <span class="hljs-built_in">array</span>(t1.sepal_length, t1.sepal_width, t1.petal_length, t1.petak_width) <span class="hljs-keyword">as</span> features, - t2.label -<span class="hljs-keyword">from</span> - <span class="hljs-keyword">raw</span> t1 - <span class="hljs-keyword">JOIN</span> label_mapping t2 <span class="hljs-keyword">ON</span> (t1.<span class="hljs-keyword">class</span> = t2.<span class="hljs-keyword">class</span>) -; -</code></pre> -<h1 id="training">Training</h1> -<p><code>train_randomforest_classifier</code> takes a dense <code>features</code> in double[] and a <code>label</code> starting from 0.</p> -<pre><code class="lang-sql"><span class="hljs-keyword">CREATE</span> <span class="hljs-keyword">TABLE</span> <span class="hljs-keyword">model</span> -<span class="hljs-keyword">STORED</span> <span class="hljs-keyword">AS</span> SEQUENCEFILE -<span class="hljs-keyword">AS</span> <span class="hljs-keyword">select</span> - train_randomforest_classifier(features, label) - <span class="hljs-comment">-- hivemall v0.4.1-alpha.2 and before</span> - <span class="hljs-comment">-- train_randomforest_classifier(features, label) as (pred_model, var_importance, oob_errors, oob_tests)</span> - <span class="hljs-comment">-- hivemall v0.4.1 and later</span> - <span class="hljs-comment">-- train_randomforest_classifier(features, label) as (model_id, model_type, pred_model, var_importance, oob_errors, oob_tests)</span> -<span class="hljs-keyword">from</span> - training; -</code></pre> -<p><em>Note: The default TEXTFILE should not be used for model table when using Javascript output through "-output javascript" option.</em></p> -<pre><code>hive> desc model; -model_id int -model_type int -pred_model string -var_importance array<double> -oob_errors int -oob_tests int -</code></pre><h2 id="training-options">Training options</h2> -<p>"-help" option shows usage of the function.</p> -<pre><code>select train_randomforest_classifier(features, label, "-help") from training; - -> FAILED: UDFArgumentException -usage: train_randomforest_classifier(double[] features, int label [, - string options]) - Returns a relation consists of <int model_id, - int model_type, string pred_model, array<double> var_importance, - int oob_errors, int oob_tests> [-attrs <arg>] [-depth <arg>] - [-disable_compression] [-help] [-leafs <arg>] [-output <arg>] - [-rule <arg>] [-seed <arg>] [-splits <arg>] [-trees <arg>] [-vars - <arg>] - -attrs,--attribute_types <arg> Comma separated attribute types (Q for - quantitative variable and C for - categorical variable. e.g., [Q,C,Q,C]) - -depth,--max_depth <arg> The maximum number of the tree depth - [default: Integer.MAX_VALUE] - -disable_compression Whether to disable compression of the - output script [default: false] - -help Show function help - -leafs,--max_leaf_nodes <arg> The maximum number of leaf nodes - [default: Integer.MAX_VALUE] - -output,--output_type <arg> The output type (serialization/ser or - opscode/vm or javascript/js) [default: - serialization] - -rule,--split_rule <arg> Split algorithm [default: GINI, ENTROPY] - -seed <arg> seed value in long [default: -1 - (random)] - -splits,--min_split <arg> A node that has greater than or equals - to `min_split` examples will split - [default: 2] - -trees,--num_trees <arg> The number of trees for each task - [default: 50] - -vars,--num_variables <arg> The number of random selected features - [default: ceil(sqrt(x[0].length))]. - int(num_variables * x[0].length) is - considered if num_variable is (0,1] -</code></pre><p><em>Caution: "-num_trees" controls the number of trees for each task, not the total number of trees.</em></p> -<h3 id="parallelize-training">Parallelize Training</h3> -<p>To parallelize RandomForest training, you can use UNION ALL as follows:</p> -<pre><code class="lang-sql"><span class="hljs-keyword">CREATE</span> <span class="hljs-keyword">TABLE</span> <span class="hljs-keyword">model</span> -<span class="hljs-keyword">STORED</span> <span class="hljs-keyword">AS</span> SEQUENCEFILE -<span class="hljs-keyword">AS</span> -<span class="hljs-keyword">select</span> - train_randomforest_classifier(features, label, <span class="hljs-string">'-trees 25'</span>) -<span class="hljs-keyword">from</span> - training -<span class="hljs-keyword">UNION</span> ALL -<span class="hljs-keyword">select</span> - train_randomforest_classifier(features, label, <span class="hljs-string">'-trees 25'</span>) -<span class="hljs-keyword">from</span> - training -; -</code></pre> -<h3 id="learning-stats">Learning stats</h3> -<p><a href="https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm#varimp" target="_blank"><code>Variable importance</code></a> and <a href="https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm#ooberr" target="_blank"><code>Out Of Bag (OOB) error rate</code></a> of RandomForest can be shown as follows:</p> -<pre><code class="lang-sql"><span class="hljs-keyword">select</span> - array_sum(var_importance) <span class="hljs-keyword">as</span> var_importance, - <span class="hljs-keyword">sum</span>(oob_errors) / <span class="hljs-keyword">sum</span>(oob_tests) <span class="hljs-keyword">as</span> oob_err_rate -<span class="hljs-keyword">from</span> - <span class="hljs-keyword">model</span>; -</code></pre> -<blockquote> -<p>[2.81010338879605,0.4970357753626371,23.790369091407698,14.315316390235273] 0.05333333333333334</p> -</blockquote> -<h3 id="output-prediction-model-by-javascipt">Output prediction model by Javascipt</h3> -<pre><code class="lang-sql"><span class="hljs-keyword">CREATE</span> <span class="hljs-keyword">TABLE</span> model_javascript -<span class="hljs-keyword">STORED</span> <span class="hljs-keyword">AS</span> SEQUENCEFILE -<span class="hljs-keyword">AS</span> -<span class="hljs-keyword">select</span> train_randomforest_classifier(features, label, <span class="hljs-string">"-output_type js -disable_compression"</span>) -<span class="hljs-keyword">from</span> training; - -<span class="hljs-keyword">select</span> <span class="hljs-keyword">model</span> <span class="hljs-keyword">from</span> model_javascript <span class="hljs-keyword">limit</span> <span class="hljs-number">1</span>; -</code></pre> -<pre><code class="lang-js"><span class="hljs-keyword">if</span>(x[<span class="hljs-number">3</span>] <= <span class="hljs-number">0.5</span>) { - <span class="hljs-number">0</span>; -} <span class="hljs-keyword">else</span> { - <span class="hljs-keyword">if</span>(x[<span class="hljs-number">2</span>] <= <span class="hljs-number">4.5</span>) { - <span class="hljs-keyword">if</span>(x[<span class="hljs-number">3</span>] <= <span class="hljs-number">1.5</span>) { - <span class="hljs-keyword">if</span>(x[<span class="hljs-number">0</span>] <= <span class="hljs-number">4.5</span>) { - <span class="hljs-number">1</span>; - } <span class="hljs-keyword">else</span> { - <span class="hljs-keyword">if</span>(x[<span class="hljs-number">0</span>] <= <span class="hljs-number">5.5</span>) { - <span class="hljs-number">1</span>; - } <span class="hljs-keyword">else</span> { - <span class="hljs-keyword">if</span>(x[<span class="hljs-number">1</span>] <= <span class="hljs-number">2.5</span>) { - <span class="hljs-number">1</span>; - } <span class="hljs-keyword">else</span> { - <span class="hljs-number">1</span>; - } - } - } - } <span class="hljs-keyword">else</span> { - <span class="hljs-number">2</span>; - } - } <span class="hljs-keyword">else</span> { - <span class="hljs-keyword">if</span>(x[<span class="hljs-number">3</span>] <= <span class="hljs-number">1.5</span>) { - <span class="hljs-number">2</span>; - } <span class="hljs-keyword">else</span> { - <span class="hljs-number">2</span>; - } - } -} -</code></pre> -<h1 id="prediction">Prediction</h1> -<pre><code class="lang-sql"><span class="hljs-keyword">set</span> hivevar:classification=<span class="hljs-literal">true</span>; -<span class="hljs-keyword">set</span> hive.<span class="hljs-keyword">auto</span>.<span class="hljs-keyword">convert</span>.<span class="hljs-keyword">join</span>=<span class="hljs-literal">true</span>; -<span class="hljs-keyword">set</span> hive.mapjoin.optimized.hashtable=<span class="hljs-literal">false</span>; - -<span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> predicted_vm -<span class="hljs-keyword">as</span> -<span class="hljs-keyword">SELECT</span> - <span class="hljs-keyword">rowid</span>, - rf_ensemble(predicted) <span class="hljs-keyword">as</span> predicted -<span class="hljs-keyword">FROM</span> ( - <span class="hljs-keyword">SELECT</span> - <span class="hljs-keyword">rowid</span>, - <span class="hljs-comment">-- hivemall v0.4.1-alpha.2 and before</span> - <span class="hljs-comment">-- tree_predict(p.model, t.features, ${classification}) as predicted</span> - <span class="hljs-comment">-- hivemall v0.4.1 and later</span> - tree_predict(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) <span class="hljs-keyword">as</span> predicted - <span class="hljs-keyword">FROM</span> - <span class="hljs-keyword">model</span> p - <span class="hljs-keyword">LEFT</span> <span class="hljs-keyword">OUTER</span> <span class="hljs-keyword">JOIN</span> <span class="hljs-comment">-- CROSS JOIN</span> - training t -) t1 -<span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span> - <span class="hljs-keyword">rowid</span> -; -</code></pre> -<p><em>Note: Javascript outputs can be evaluated by <code>js_tree_predict</code>.</em></p> -<h3 id="parallelize-prediction">Parallelize Prediction</h3> -<p>The following query runs predictions in N-parallel. It would reduce elapsed time for prediction almost by N.</p> -<pre><code class="lang-sql"><span class="hljs-keyword">SET</span> hivevar:classification=<span class="hljs-literal">true</span>; -<span class="hljs-keyword">set</span> hive.<span class="hljs-keyword">auto</span>.<span class="hljs-keyword">convert</span>.<span class="hljs-keyword">join</span>=<span class="hljs-literal">true</span>; -<span class="hljs-keyword">SET</span> hive.mapjoin.optimized.hashtable=<span class="hljs-literal">false</span>; -<span class="hljs-keyword">SET</span> mapred.reduce.tasks=<span class="hljs-number">8</span>; - -<span class="hljs-keyword">create</span> <span class="hljs-keyword">table</span> predicted_vm -<span class="hljs-keyword">as</span> -<span class="hljs-keyword">SELECT</span> - <span class="hljs-keyword">rowid</span>, - rf_ensemble(predicted) <span class="hljs-keyword">as</span> predicted -<span class="hljs-keyword">FROM</span> ( - <span class="hljs-keyword">SELECT</span> - t.<span class="hljs-keyword">rowid</span>, - <span class="hljs-comment">-- hivemall v0.4.1-alpha.2 and before</span> - <span class="hljs-comment">-- tree_predict(p.pred_model, t.features, ${classification}) as predicted</span> - <span class="hljs-comment">-- hivemall v0.4.1 and later</span> - tree_predict(p.model_id, p.model_type, p.pred_model, t.features, ${classification}) <span class="hljs-keyword">as</span> predicted - <span class="hljs-keyword">FROM</span> ( - <span class="hljs-keyword">SELECT</span> model_id, model_type, pred_model - <span class="hljs-keyword">FROM</span> <span class="hljs-keyword">model</span> - <span class="hljs-keyword">DISTRIBUTE</span> <span class="hljs-keyword">BY</span> <span class="hljs-keyword">rand</span>(<span class="hljs-number">1</span>) - ) p - <span class="hljs-keyword">LEFT</span> <span class="hljs-keyword">OUTER</span> <span class="hljs-keyword">JOIN</span> training t + <span class="hljs-keyword">rowid</span>, + m.col0 <span class="hljs-keyword">as</span> score, + m.col1 <span class="hljs-keyword">as</span> label +<span class="hljs-keyword">from</span> ( +<span class="hljs-keyword">select</span> + <span class="hljs-keyword">rowid</span>, + maxrow(score, label) <span class="hljs-keyword">as</span> m +<span class="hljs-keyword">from</span> ( + <span class="hljs-keyword">select</span> + t.<span class="hljs-keyword">rowid</span>, + m.label, + <span class="hljs-keyword">sum</span>(m.weight * t.<span class="hljs-keyword">value</span>) <span class="hljs-keyword">as</span> score + <span class="hljs-keyword">from</span> + test20p_exploded t <span class="hljs-keyword">LEFT</span> <span class="hljs-keyword">OUTER</span> <span class="hljs-keyword">JOIN</span> + model_scw1 m <span class="hljs-keyword">ON</span> (t.feature = m.feature) + <span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span> + t.<span class="hljs-keyword">rowid</span>, m.label ) t1 -<span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span> - <span class="hljs-keyword">rowid</span> -; +<span class="hljs-keyword">group</span> <span class="hljs-keyword">by</span> <span class="hljs-keyword">rowid</span> +) t2; </code></pre> <h1 id="evaluation">Evaluation</h1> -<pre><code class="lang-sql">select count(1) from training; -> 150 - -set hivevar:total_cnt=150; - -WITH t1 as ( -SELECT - t.rowid, - t.label as actual, - p.predicted.label as predicted -FROM - predicted_vm p - LEFT OUTER JOIN training t ON (t.rowid = p.rowid) -) -SELECT - count(1) / ${total_cnt} -FROM - t1 -WHERE - actual = predicted -; +<pre><code class="lang-sql"><span class="hljs-keyword">create</span> <span class="hljs-keyword">or</span> <span class="hljs-keyword">replace</span> <span class="hljs-keyword">view</span> eval_scw1 <span class="hljs-keyword">as</span> +<span class="hljs-keyword">select</span> + t.label <span class="hljs-keyword">as</span> actual, + p.label <span class="hljs-keyword">as</span> predicted +<span class="hljs-keyword">from</span> + test20p t <span class="hljs-keyword">JOIN</span> predict_scw1 p + <span class="hljs-keyword">on</span> (t.<span class="hljs-keyword">rowid</span> = p.<span class="hljs-keyword">rowid</span>); + +<span class="hljs-keyword">select</span> <span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>)/<span class="hljs-number">30</span> <span class="hljs-keyword">from</span> eval_scw1 +<span class="hljs-keyword">where</span> actual = predicted; </code></pre> <blockquote> -<p>0.9533333333333334</p> +<p>0.9666666666666667</p> </blockquote> <p><div id="page-footer" class="localized-footer"><hr><!-- Licensed to the Apache Software Foundation (ASF) under one @@ -2431,7 +2234,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"SCW","level":"7.2.2","depth":2,"next":{"title":"RandomForest","level":"7.2.3","depth":2,"path":"multiclass/iris_randomforest.md","ref":"multiclass/iris_randomforest.md","articles":[]},"previous":{"title":"Data preparation","level":"7.2.1","depth":2,"path":"multiclass/iris_dataset.md","ref":"multiclass/iris_dataset.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter ":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selecto r":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"multiclass/iris_scw.md","mtime":"2016-12-02T08:02:42.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-06-23T09:59:20.878Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"SCW","level":"7.2.2","depth":2,"next":{"title":"Random Forest","level":"7.2.3","depth":2,"path":"multiclass/iris_randomforest.md","ref":"multiclass/iris_randomforest.md","articles":[]},"previous":{"title":"Data preparation","level":"7.2.1","depth":2,"path":"multiclass/iris_dataset.md","ref":"multiclass/iris_dataset.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitte r":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"select or":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"multiclass/iris_scw.md","mtime":"2017-06-30T12:15:19.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-06-30T12:18:33.308Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/eb070238/userguide/multiclass/news20.html ---------------------------------------------------------------------- diff --git a/userguide/multiclass/news20.html b/userguide/multiclass/news20.html index 3cf7325..2537e6c 100644 --- a/userguide/multiclass/news20.html +++ b/userguide/multiclass/news20.html @@ -980,6 +980,21 @@ </li> + <li class="chapter " data-level="6.3.5" data-path="../binaryclass/news20_rf.html"> + + <a href="../binaryclass/news20_rf.html"> + + + <b>6.3.5.</b> + + Random Forest + + </a> + + + + </li> + </ul> @@ -1324,7 +1339,7 @@ <b>7.2.3.</b> - RandomForest + Random Forest </a> @@ -2165,7 +2180,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"News20 Multiclass tutorial","level":"7.1","depth":1,"next":{"title":"Data preparation","level":"7.1.1","depth":2,"path":"multiclass/news20_dataset.md","ref":"multiclass/news20_dataset.md","articles":[]},"previous":{"title":"Kaggle Titanic tutorial","level":"6.7","depth":1,"path":"binaryclass/titanic_rf.md","ref":"binaryclass/titanic_rf.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator -hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true },"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"multiclass/news20.md","mtime":"2016-12-02T08:02:42.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-06-23T09:59:20.878Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"News20 Multiclass tutorial","level":"7.1","depth":1,"next":{"title":"Data preparation","level":"7.1.1","depth":2,"path":"multiclass/news20_dataset.md","ref":"multiclass/news20_dataset.md","articles":[]},"previous":{"title":"Kaggle Titanic tutorial","level":"6.7","depth":1,"path":"binaryclass/titanic_rf.md","ref":"binaryclass/titanic_rf.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator -hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true },"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"multiclass/news20.md","mtime":"2016-12-02T08:02:42.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-06-30T12:18:33.308Z"},"basePath":"..","book":{"language":""}}); }); </script> </div>
