http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/a98b42f8/userguide/ft_engineering/hashing.html ---------------------------------------------------------------------- diff --git a/userguide/ft_engineering/hashing.html b/userguide/ft_engineering/hashing.html index cae87ab..0331934 100644 --- a/userguide/ft_engineering/hashing.html +++ b/userguide/ft_engineering/hashing.html @@ -244,7 +244,7 @@ <b>1.3.1.</b> - Explicit addBias() for better prediction + Explicit add_bias() for better prediction </a> @@ -707,14 +707,14 @@ - <li class="chapter " data-level="4.1" data-path="../eval/stat_eval.html"> + <li class="chapter " data-level="4.1" data-path="../eval/binary_classification_measures.html"> - <a href="../eval/stat_eval.html"> + <a href="../eval/binary_classification_measures.html"> <b>4.1.</b> - Statistical evaluation of a prediction model + Binary Classification Metrics </a> @@ -743,13 +743,43 @@ </li> - <li class="chapter " data-level="4.2" data-path="../eval/rank.html"> + <li class="chapter " data-level="4.2" data-path="../eval/multilabel_classification_measures.html"> - <a href="../eval/rank.html"> + <a href="../eval/multilabel_classification_measures.html"> <b>4.2.</b> + Multi-label Classification Metrics + + </a> + + + + </li> + + <li class="chapter " data-level="4.3" data-path="../eval/regression.html"> + + <a href="../eval/regression.html"> + + + <b>4.3.</b> + + Regression metrics + + </a> + + + + </li> + + <li class="chapter " data-level="4.4" data-path="../eval/rank.html"> + + <a href="../eval/rank.html"> + + + <b>4.4.</b> + Ranking Measures </a> @@ -758,12 +788,12 @@ </li> - <li class="chapter " data-level="4.3" data-path="../eval/datagen.html"> + <li class="chapter " data-level="4.5" data-path="../eval/datagen.html"> <a href="../eval/datagen.html"> - <b>4.3.</b> + <b>4.5.</b> Data Generation @@ -774,12 +804,12 @@ <ul class="articles"> - <li class="chapter " data-level="4.3.1" data-path="../eval/lr_datagen.html"> + <li class="chapter " data-level="4.5.1" data-path="../eval/lr_datagen.html"> <a href="../eval/lr_datagen.html"> - <b>4.3.1.</b> + <b>4.5.1.</b> Logistic Regression data generation @@ -2321,7 +2351,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"Feature Hashing","level":"3.2","depth":1,"next":{"title":"Feature Selection","level":"3.3","depth":1,"path":"ft_engineering/selection.md","ref":"ft_engineering/selection.md","articles":[]},"previous":{"title":"Feature Scaling","level":"3.1","depth":1,"path":"ft_engineering/scaling.md","ref":"ft_engineering/scaling.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter ":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selecto r":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"ft_engineering/hashing.md","mtime":"2017-07-05T09:10:51.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-07-14T17:59:22.591Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"Feature Hashing","level":"3.2","depth":1,"next":{"title":"Feature Selection","level":"3.3","depth":1,"path":"ft_engineering/selection.md","ref":"ft_engineering/selection.md","articles":[]},"previous":{"title":"Feature Scaling","level":"3.1","depth":1,"path":"ft_engineering/scaling.md","ref":"ft_engineering/scaling.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter ":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selecto r":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"ft_engineering/hashing.md","mtime":"2017-07-20T09:43:22.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-09-13T14:07:31.053Z"},"basePath":"..","book":{"language":""}}); }); </script> </div>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/a98b42f8/userguide/ft_engineering/pairing.html ---------------------------------------------------------------------- diff --git a/userguide/ft_engineering/pairing.html b/userguide/ft_engineering/pairing.html index f5cf044..23a29c7 100644 --- a/userguide/ft_engineering/pairing.html +++ b/userguide/ft_engineering/pairing.html @@ -244,7 +244,7 @@ <b>1.3.1.</b> - Explicit addBias() for better prediction + Explicit add_bias() for better prediction </a> @@ -707,14 +707,14 @@ - <li class="chapter " data-level="4.1" data-path="../eval/stat_eval.html"> + <li class="chapter " data-level="4.1" data-path="../eval/binary_classification_measures.html"> - <a href="../eval/stat_eval.html"> + <a href="../eval/binary_classification_measures.html"> <b>4.1.</b> - Statistical evaluation of a prediction model + Binary Classification Metrics </a> @@ -743,13 +743,43 @@ </li> - <li class="chapter " data-level="4.2" data-path="../eval/rank.html"> + <li class="chapter " data-level="4.2" data-path="../eval/multilabel_classification_measures.html"> - <a href="../eval/rank.html"> + <a href="../eval/multilabel_classification_measures.html"> <b>4.2.</b> + Multi-label Classification Metrics + + </a> + + + + </li> + + <li class="chapter " data-level="4.3" data-path="../eval/regression.html"> + + <a href="../eval/regression.html"> + + + <b>4.3.</b> + + Regression metrics + + </a> + + + + </li> + + <li class="chapter " data-level="4.4" data-path="../eval/rank.html"> + + <a href="../eval/rank.html"> + + + <b>4.4.</b> + Ranking Measures </a> @@ -758,12 +788,12 @@ </li> - <li class="chapter " data-level="4.3" data-path="../eval/datagen.html"> + <li class="chapter " data-level="4.5" data-path="../eval/datagen.html"> <a href="../eval/datagen.html"> - <b>4.3.</b> + <b>4.5.</b> Data Generation @@ -774,12 +804,12 @@ <ul class="articles"> - <li class="chapter " data-level="4.3.1" data-path="../eval/lr_datagen.html"> + <li class="chapter " data-level="4.5.1" data-path="../eval/lr_datagen.html"> <a href="../eval/lr_datagen.html"> - <b>4.3.1.</b> + <b>4.5.1.</b> Logistic Regression data generation @@ -2210,7 +2240,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"FEATURE PAIRING","level":"3.5","depth":1,"next":{"title":"Polynomial Features","level":"3.5.1","depth":2,"path":"ft_engineering/polynomial.md","ref":"ft_engineering/polynomial.md","articles":[]},"previous":{"title":"Feature Binning","level":"3.4","depth":1,"path":"ft_engineering/binning.md","ref":"ft_engineering/binning.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"sp litter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"s elector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"ft_engineering/pairing.md","mtime":"2017-06-23T09:56:22.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-07-14T17:59:22.591Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"FEATURE PAIRING","level":"3.5","depth":1,"next":{"title":"Polynomial Features","level":"3.5.1","depth":2,"path":"ft_engineering/polynomial.md","ref":"ft_engineering/polynomial.md","articles":[]},"previous":{"title":"Feature Binning","level":"3.4","depth":1,"path":"ft_engineering/binning.md","ref":"ft_engineering/binning.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"sp litter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"s elector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"ft_engineering/pairing.md","mtime":"2017-07-20T09:43:22.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-09-13T14:07:31.053Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/a98b42f8/userguide/ft_engineering/polynomial.html ---------------------------------------------------------------------- diff --git a/userguide/ft_engineering/polynomial.html b/userguide/ft_engineering/polynomial.html index 3e5e9eb..d0a7d25 100644 --- a/userguide/ft_engineering/polynomial.html +++ b/userguide/ft_engineering/polynomial.html @@ -244,7 +244,7 @@ <b>1.3.1.</b> - Explicit addBias() for better prediction + Explicit add_bias() for better prediction </a> @@ -707,14 +707,14 @@ - <li class="chapter " data-level="4.1" data-path="../eval/stat_eval.html"> + <li class="chapter " data-level="4.1" data-path="../eval/binary_classification_measures.html"> - <a href="../eval/stat_eval.html"> + <a href="../eval/binary_classification_measures.html"> <b>4.1.</b> - Statistical evaluation of a prediction model + Binary Classification Metrics </a> @@ -743,13 +743,43 @@ </li> - <li class="chapter " data-level="4.2" data-path="../eval/rank.html"> + <li class="chapter " data-level="4.2" data-path="../eval/multilabel_classification_measures.html"> - <a href="../eval/rank.html"> + <a href="../eval/multilabel_classification_measures.html"> <b>4.2.</b> + Multi-label Classification Metrics + + </a> + + + + </li> + + <li class="chapter " data-level="4.3" data-path="../eval/regression.html"> + + <a href="../eval/regression.html"> + + + <b>4.3.</b> + + Regression metrics + + </a> + + + + </li> + + <li class="chapter " data-level="4.4" data-path="../eval/rank.html"> + + <a href="../eval/rank.html"> + + + <b>4.4.</b> + Ranking Measures </a> @@ -758,12 +788,12 @@ </li> - <li class="chapter " data-level="4.3" data-path="../eval/datagen.html"> + <li class="chapter " data-level="4.5" data-path="../eval/datagen.html"> <a href="../eval/datagen.html"> - <b>4.3.</b> + <b>4.5.</b> Data Generation @@ -774,12 +804,12 @@ <ul class="articles"> - <li class="chapter " data-level="4.3.1" data-path="../eval/lr_datagen.html"> + <li class="chapter " data-level="4.5.1" data-path="../eval/lr_datagen.html"> <a href="../eval/lr_datagen.html"> - <b>4.3.1.</b> + <b>4.5.1.</b> Logistic Regression data generation @@ -2258,7 +2288,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"Polynomial Features","level":"3.5.1","depth":2,"next":{"title":"FEATURE TRANSFORMATION","level":"3.6","depth":1,"path":"ft_engineering/ft_trans.md","ref":"ft_engineering/ft_trans.md","articles":[{"title":"Feature Vectorization","level":"3.6.1","depth":2,"path":"ft_engineering/vectorization.md","ref":"ft_engineering/vectorization.md","articles":[]},{"title":"Quantify non-number features","level":"3.6.2","depth":2,"path":"ft_engineering/quantify.md","ref":"ft_engineering/quantify.md","articles":[]}]},"previous":{"title":"FEATURE PAIRING","level":"3.5","depth":1,"path":"ft_engineering/pairing.md","ref":"ft_engineering/pairing.md","articles":[{"title":"Polynomial Features","level":"3.5.1","depth":2,"path":"ft_engineering/polynomial.md","ref":"ft_engineering/polynomial.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-f ilename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook ":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitb ook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"ft_engineering/polynomial.md","mtime":"2017-06-23T09:56:22.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-07-14T17:59:22.591Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"Polynomial Features","level":"3.5.1","depth":2,"next":{"title":"FEATURE TRANSFORMATION","level":"3.6","depth":1,"path":"ft_engineering/ft_trans.md","ref":"ft_engineering/ft_trans.md","articles":[{"title":"Feature Vectorization","level":"3.6.1","depth":2,"path":"ft_engineering/vectorization.md","ref":"ft_engineering/vectorization.md","articles":[]},{"title":"Quantify non-number features","level":"3.6.2","depth":2,"path":"ft_engineering/quantify.md","ref":"ft_engineering/quantify.md","articles":[]}]},"previous":{"title":"FEATURE PAIRING","level":"3.5","depth":1,"path":"ft_engineering/pairing.md","ref":"ft_engineering/pairing.md","articles":[{"title":"Polynomial Features","level":"3.5.1","depth":2,"path":"ft_engineering/polynomial.md","ref":"ft_engineering/polynomial.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-f ilename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook ":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitb ook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"ft_engineering/polynomial.md","mtime":"2017-07-20T11:24:46.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-09-13T14:07:31.053Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/a98b42f8/userguide/ft_engineering/quantify.html ---------------------------------------------------------------------- diff --git a/userguide/ft_engineering/quantify.html b/userguide/ft_engineering/quantify.html index fc1de97..25b004e 100644 --- a/userguide/ft_engineering/quantify.html +++ b/userguide/ft_engineering/quantify.html @@ -244,7 +244,7 @@ <b>1.3.1.</b> - Explicit addBias() for better prediction + Explicit add_bias() for better prediction </a> @@ -707,14 +707,14 @@ - <li class="chapter " data-level="4.1" data-path="../eval/stat_eval.html"> + <li class="chapter " data-level="4.1" data-path="../eval/binary_classification_measures.html"> - <a href="../eval/stat_eval.html"> + <a href="../eval/binary_classification_measures.html"> <b>4.1.</b> - Statistical evaluation of a prediction model + Binary Classification Metrics </a> @@ -743,13 +743,43 @@ </li> - <li class="chapter " data-level="4.2" data-path="../eval/rank.html"> + <li class="chapter " data-level="4.2" data-path="../eval/multilabel_classification_measures.html"> - <a href="../eval/rank.html"> + <a href="../eval/multilabel_classification_measures.html"> <b>4.2.</b> + Multi-label Classification Metrics + + </a> + + + + </li> + + <li class="chapter " data-level="4.3" data-path="../eval/regression.html"> + + <a href="../eval/regression.html"> + + + <b>4.3.</b> + + Regression metrics + + </a> + + + + </li> + + <li class="chapter " data-level="4.4" data-path="../eval/rank.html"> + + <a href="../eval/rank.html"> + + + <b>4.4.</b> + Ranking Measures </a> @@ -758,12 +788,12 @@ </li> - <li class="chapter " data-level="4.3" data-path="../eval/datagen.html"> + <li class="chapter " data-level="4.5" data-path="../eval/datagen.html"> <a href="../eval/datagen.html"> - <b>4.3.</b> + <b>4.5.</b> Data Generation @@ -774,12 +804,12 @@ <ul class="articles"> - <li class="chapter " data-level="4.3.1" data-path="../eval/lr_datagen.html"> + <li class="chapter " data-level="4.5.1" data-path="../eval/lr_datagen.html"> <a href="../eval/lr_datagen.html"> - <b>4.3.1.</b> + <b>4.5.1.</b> Logistic Regression data generation @@ -2358,7 +2388,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"Quantify non-number features","level":"3.6.2","depth":2,"next":{"title":"TF-IDF Calculation","level":"3.7","depth":1,"path":"ft_engineering/tfidf.md","ref":"ft_engineering/tfidf.md","articles":[]},"previous":{"title":"Feature Vectorization","level":"3.6.1","depth":2,"path":"ft_engineering/vectorization.md","ref":"ft_engineering/vectorization.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/inc ubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel ":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"ft_engineering/quantify.md","mtime":"2017-05-11T07:09:12.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-07-14T17:59:22.591Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"Quantify non-number features","level":"3.6.2","depth":2,"next":{"title":"TF-IDF Calculation","level":"3.7","depth":1,"path":"ft_engineering/tfidf.md","ref":"ft_engineering/tfidf.md","articles":[]},"previous":{"title":"Feature Vectorization","level":"3.6.1","depth":2,"path":"ft_engineering/vectorization.md","ref":"ft_engineering/vectorization.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/inc ubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel ":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"ft_engineering/quantify.md","mtime":"2017-07-20T11:24:46.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-09-13T14:07:31.053Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/a98b42f8/userguide/ft_engineering/scaling.html ---------------------------------------------------------------------- diff --git a/userguide/ft_engineering/scaling.html b/userguide/ft_engineering/scaling.html index 0ed8ce2..1cdd9f3 100644 --- a/userguide/ft_engineering/scaling.html +++ b/userguide/ft_engineering/scaling.html @@ -244,7 +244,7 @@ <b>1.3.1.</b> - Explicit addBias() for better prediction + Explicit add_bias() for better prediction </a> @@ -707,14 +707,14 @@ - <li class="chapter " data-level="4.1" data-path="../eval/stat_eval.html"> + <li class="chapter " data-level="4.1" data-path="../eval/binary_classification_measures.html"> - <a href="../eval/stat_eval.html"> + <a href="../eval/binary_classification_measures.html"> <b>4.1.</b> - Statistical evaluation of a prediction model + Binary Classification Metrics </a> @@ -743,13 +743,43 @@ </li> - <li class="chapter " data-level="4.2" data-path="../eval/rank.html"> + <li class="chapter " data-level="4.2" data-path="../eval/multilabel_classification_measures.html"> - <a href="../eval/rank.html"> + <a href="../eval/multilabel_classification_measures.html"> <b>4.2.</b> + Multi-label Classification Metrics + + </a> + + + + </li> + + <li class="chapter " data-level="4.3" data-path="../eval/regression.html"> + + <a href="../eval/regression.html"> + + + <b>4.3.</b> + + Regression metrics + + </a> + + + + </li> + + <li class="chapter " data-level="4.4" data-path="../eval/rank.html"> + + <a href="../eval/rank.html"> + + + <b>4.4.</b> + Ranking Measures </a> @@ -758,12 +788,12 @@ </li> - <li class="chapter " data-level="4.3" data-path="../eval/datagen.html"> + <li class="chapter " data-level="4.5" data-path="../eval/datagen.html"> <a href="../eval/datagen.html"> - <b>4.3.</b> + <b>4.5.</b> Data Generation @@ -774,12 +804,12 @@ <ul class="articles"> - <li class="chapter " data-level="4.3.1" data-path="../eval/lr_datagen.html"> + <li class="chapter " data-level="4.5.1" data-path="../eval/lr_datagen.html"> <a href="../eval/lr_datagen.html"> - <b>4.3.1.</b> + <b>4.5.1.</b> Logistic Regression data generation @@ -2372,7 +2402,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"Feature Scaling","level":"3.1","depth":1,"next":{"title":"Feature Hashing","level":"3.2","depth":1,"path":"ft_engineering/hashing.md","ref":"ft_engineering/hashing.md","articles":[]},"previous":{"title":"Text Tokenizer","level":"2.3","depth":1,"path":"misc/tokenizer.md","ref":"misc/tokenizer.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downl oadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.cal lout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"ft_engineering/scaling.md","mtime":"2017-05-11T07:09:12.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-07-14T17:59:22.591Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"Feature Scaling","level":"3.1","depth":1,"next":{"title":"Feature Hashing","level":"3.2","depth":1,"path":"ft_engineering/hashing.md","ref":"ft_engineering/hashing.md","articles":[]},"previous":{"title":"Text Tokenizer","level":"2.3","depth":1,"path":"misc/tokenizer.md","ref":"misc/tokenizer.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downl oadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.cal lout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"ft_engineering/scaling.md","mtime":"2017-07-20T11:24:46.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-09-13T14:07:31.053Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/a98b42f8/userguide/ft_engineering/selection.html ---------------------------------------------------------------------- diff --git a/userguide/ft_engineering/selection.html b/userguide/ft_engineering/selection.html index b080cbd..0d8e579 100644 --- a/userguide/ft_engineering/selection.html +++ b/userguide/ft_engineering/selection.html @@ -244,7 +244,7 @@ <b>1.3.1.</b> - Explicit addBias() for better prediction + Explicit add_bias() for better prediction </a> @@ -707,14 +707,14 @@ - <li class="chapter " data-level="4.1" data-path="../eval/stat_eval.html"> + <li class="chapter " data-level="4.1" data-path="../eval/binary_classification_measures.html"> - <a href="../eval/stat_eval.html"> + <a href="../eval/binary_classification_measures.html"> <b>4.1.</b> - Statistical evaluation of a prediction model + Binary Classification Metrics </a> @@ -743,13 +743,43 @@ </li> - <li class="chapter " data-level="4.2" data-path="../eval/rank.html"> + <li class="chapter " data-level="4.2" data-path="../eval/multilabel_classification_measures.html"> - <a href="../eval/rank.html"> + <a href="../eval/multilabel_classification_measures.html"> <b>4.2.</b> + Multi-label Classification Metrics + + </a> + + + + </li> + + <li class="chapter " data-level="4.3" data-path="../eval/regression.html"> + + <a href="../eval/regression.html"> + + + <b>4.3.</b> + + Regression metrics + + </a> + + + + </li> + + <li class="chapter " data-level="4.4" data-path="../eval/rank.html"> + + <a href="../eval/rank.html"> + + + <b>4.4.</b> + Ranking Measures </a> @@ -758,12 +788,12 @@ </li> - <li class="chapter " data-level="4.3" data-path="../eval/datagen.html"> + <li class="chapter " data-level="4.5" data-path="../eval/datagen.html"> <a href="../eval/datagen.html"> - <b>4.3.</b> + <b>4.5.</b> Data Generation @@ -774,12 +804,12 @@ <ul class="articles"> - <li class="chapter " data-level="4.3.1" data-path="../eval/lr_datagen.html"> + <li class="chapter " data-level="4.5.1" data-path="../eval/lr_datagen.html"> <a href="../eval/lr_datagen.html"> - <b>4.3.1.</b> + <b>4.5.1.</b> Logistic Regression data generation @@ -2180,11 +2210,11 @@ <h1 id="supported-feature-selection-algorithms">Supported Feature Selection algorithms</h1> <ul> <li>Chi-square (Chi2)<ul> -<li>In statistics, the <span class="katex"><span class="katex-mathml"><math><semantics><mrow><msup><mi>χ</mi><mn>2</mn></msup></mrow><annotation encoding="application/x-tex">\chi^2</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.8141079999999999em;"></span><span class="strut bottom" style="height:1.008548em;vertical-align:-0.19444em;"></span><span class="base textstyle uncramped"><span class="mord"><span class="mord mathit">χ</span><span class="vlist"><span style="top:-0.363em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle uncramped"><span class="mord mathrm">2</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span></span></span> test is applied to test the indep endence of two even events. Chi-square statistics between every feature variable and the target variable can be applied to Feature Selection. Refer <a href="http://nlp.stanford.edu/IR-book/html/htmledition/feature-selectionchi2-feature-selection-1.html" target="_blank">this article</a> for Mathematical details.</li> +<li>In statistics, the <span class="katex"><span class="katex-mathml"><math><semantics><mrow><msup><mi>χ</mi><mn>2</mn></msup></mrow><annotation encoding="application/x-tex">\chi^2</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.8141079999999999em;"></span><span class="strut bottom" style="height:1.008548em;vertical-align:-0.19444em;"></span><span class="base textstyle uncramped"><span class="mord"><span class="mord mathit">χ</span><span class="msupsub"><span class="vlist"><span style="top:-0.363em;margin-right:0.05em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle uncramped mtight"><span class="mord mathrm mtight">2</span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span></span></spa n></span> test is applied to test the independence of two even events. Chi-square statistics between every feature variable and the target variable can be applied to Feature Selection. Refer <a href="http://nlp.stanford.edu/IR-book/html/htmledition/feature-selectionchi2-feature-selection-1.html" target="_blank">this article</a> for Mathematical details.</li> </ul> </li> <li>Signal Noise Ratio (SNR)<ul> -<li>The Signal Noise Ratio (SNR) is a univariate feature ranking metric, which can be used as a feature selection criterion for binary classification problems. SNR is defined as <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi mathvariant="normal">∣</mi><msub><mi>μ</mi><mrow><mn>1</mn></mrow></msub><mo>−</mo><msub><mi>μ</mi><mrow><mn>2</mn></mrow></msub><mi mathvariant="normal">∣</mi><mi mathvariant="normal">/</mi><mo>(</mo><msub><mi>σ</mi><mrow><mn>1</mn></mrow></msub><mo>+</mo><msub><mi>σ</mi><mrow><mn>2</mn></mrow></msub><mo>)</mo></mrow><annotation encoding="application/x-tex">|\mu_{1} - \mu_{2}| / (\sigma_{1} + \sigma_{2})</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.75em;"></span><span class="strut bottom" style="height:1em;vertical-align:-0.25em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">∣</span><s pan class="mord"><span class="mord mathit">μ</span><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped"><span class="mord scriptstyle cramped"><span class="mord mathrm">1</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span><span class="mbin">−</span><span class="mord"><span class="mord mathit">μ</span><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped"><span class="mord scriptstyle cramped"><span class="mord mathrm">2</span></span></span></span><span class="baseline-fix"><span class= "fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span><span class="mord mathrm">∣</span><span class="mord mathrm">/</span><span class="mopen">(</span><span class="mord"><span class="mord mathit" style="margin-right:0.03588em;">σ</span><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.03588em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped"><span class="mord scriptstyle cramped"><span class="mord mathrm">1</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span><span class="mbin">+</span><span class="mord"><span class="mord mathit" style="margin-right:0.03588em;">σ</span><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0. 03588em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped"><span class="mord scriptstyle cramped"><span class="mord mathrm">2</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span><span class="mclose">)</span></span></span></span>, where <span class="katex"><span class="katex-mathml"><math><semantics><mrow><msub><mi>μ</mi><mrow><mi>k</mi></mrow></msub></mrow><annotation encoding="application/x-tex">\mu_{k}</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.43056em;"></span><span class="strut bottom" style="height:0.625em;vertical-align:-0.19444em;"></span><span class="base textstyle uncramped"><span class="mord"><span class="mord mathit">μ</span><span class="vlist"><span style="t op:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped"><span class="mord scriptstyle cramped"><span class="mord mathit" style="margin-right:0.03148em;">k</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span></span></span> is the mean value of the variable in classes <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>k</mi></mrow><annotation encoding="application/x-tex">k</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.69444em;"></span><span class="strut bottom" style="height:0.69444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.03148em;">k</span>< /span></span></span>, and <span class="katex"><span class="katex-mathml"><math><semantics><mrow><msub><mi>σ</mi><mrow><mi>k</mi></mrow></msub></mrow><annotation encoding="application/x-tex">\sigma_{k}</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.43056em;"></span><span class="strut bottom" style="height:0.58056em;vertical-align:-0.15em;"></span><span class="base textstyle uncramped"><span class="mord"><span class="mord mathit" style="margin-right:0.03588em;">σ</span><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.03588em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped"><span class="mord scriptstyle cramped"><span class="mord mathit" style="margin-right:0.03148em;">k</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5">< span style="font-size:0em;">​</span></span>​</span></span></span></span></span></span> is the standard deviations of the variable in classes <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>k</mi></mrow><annotation encoding="application/x-tex">k</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.69444em;"></span><span class="strut bottom" style="height:0.69444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.03148em;">k</span></span></span></span>. Clearly, features with larger SNR are useful for classification.</li> +<li>The Signal Noise Ratio (SNR) is a univariate feature ranking metric, which can be used as a feature selection criterion for binary classification problems. SNR is defined as <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi mathvariant="normal">∣</mi><msub><mi>μ</mi><mrow><mn>1</mn></mrow></msub><mo>−</mo><msub><mi>μ</mi><mrow><mn>2</mn></mrow></msub><mi mathvariant="normal">∣</mi><mi mathvariant="normal">/</mi><mo>(</mo><msub><mi>σ</mi><mrow><mn>1</mn></mrow></msub><mo>+</mo><msub><mi>σ</mi><mrow><mn>2</mn></mrow></msub><mo>)</mo></mrow><annotation encoding="application/x-tex">|\mu_{1} - \mu_{2}| / (\sigma_{1} + \sigma_{2})</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.75em;"></span><span class="strut bottom" style="height:1em;vertical-align:-0.25em;"></span><span class="base textstyle uncramped"><span class="mord mathrm">∣</span><s pan class="mord"><span class="mord mathit">μ</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathrm mtight">1</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span><span class="mbin">−</span><span class="mord"><span class="mord mathit">μ</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span cl ass="mord mathrm mtight">2</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span><span class="mord mathrm">∣</span><span class="mord mathrm">/</span><span class="mopen">(</span><span class="mord"><span class="mord mathit" style="margin-right:0.03588em;">σ</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.03588em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathrm mtight">1</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span><span class="mbin">+</span><span class="mord">< span class="mord mathit" style="margin-right:0.03588em;">σ</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.03588em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathrm mtight">2</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span><span class="mclose">)</span></span></span></span>, where <span class="katex"><span class="katex-mathml"><math><semantics><mrow><msub><mi>μ</mi><mrow><mi>k</mi></mrow></msub></mrow><annotation encoding="application/x-tex">\mu_{k}</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.43056em;"></span><span class="str ut bottom" style="height:0.625em;vertical-align:-0.19444em;"></span><span class="base textstyle uncramped"><span class="mord"><span class="mord mathit">μ</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:0em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span></span></span></span> is the mean value of the variable in classes <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>k</mi></mrow><annotation encoding="application/x-tex">k</annotation></semantics></math></span><span class="katex-html" aria-hi dden="true"><span class="strut" style="height:0.69444em;"></span><span class="strut bottom" style="height:0.69444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.03148em;">k</span></span></span></span>, and <span class="katex"><span class="katex-mathml"><math><semantics><mrow><msub><mi>σ</mi><mrow><mi>k</mi></mrow></msub></mrow><annotation encoding="application/x-tex">\sigma_{k}</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.43056em;"></span><span class="strut bottom" style="height:0.58056em;vertical-align:-0.15em;"></span><span class="base textstyle uncramped"><span class="mord"><span class="mord mathit" style="margin-right:0.03588em;">σ</span><span class="msupsub"><span class="vlist"><span style="top:0.15em;margin-right:0.05em;margin-left:-0.03588em;"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">&#x 200B;</span></span><span class="reset-textstyle scriptstyle cramped mtight"><span class="mord scriptstyle cramped mtight"><span class="mord mathit mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class="baseline-fix"><span class="fontsize-ensurer reset-size5 size5"><span style="font-size:0em;">​</span></span>​</span></span></span></span></span></span></span> is the standard deviations of the variable in classes <span class="katex"><span class="katex-mathml"><math><semantics><mrow><mi>k</mi></mrow><annotation encoding="application/x-tex">k</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="strut" style="height:0.69444em;"></span><span class="strut bottom" style="height:0.69444em;vertical-align:0em;"></span><span class="base textstyle uncramped"><span class="mord mathit" style="margin-right:0.03148em;">k</span></span></span></span>. Clearly, features with larger SNR are useful for classification.</li> </ul> </li> </ul> @@ -2417,7 +2447,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"Feature Selection","level":"3.3","depth":1,"next":{"title":"Feature Binning","level":"3.4","depth":1,"path":"ft_engineering/binning.md","ref":"ft_engineering/binning.md","articles":[]},"previous":{"title":"Feature Hashing","level":"3.2","depth":1,"path":"ft_engineering/hashing.md","ref":"ft_engineering/hashing.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{} ,"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":" h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"ft_engineering/selection.md","mtime":"2017-05-11T07:09:12.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-07-14T17:59:22.591Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"Feature Selection","level":"3.3","depth":1,"next":{"title":"Feature Binning","level":"3.4","depth":1,"path":"ft_engineering/binning.md","ref":"ft_engineering/binning.md","articles":[]},"previous":{"title":"Feature Hashing","level":"3.2","depth":1,"path":"ft_engineering/hashing.md","ref":"ft_engineering/hashing.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{} ,"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":" h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"ft_engineering/selection.md","mtime":"2017-07-20T09:43:22.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-09-13T14:07:31.053Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/a98b42f8/userguide/ft_engineering/tfidf.html ---------------------------------------------------------------------- diff --git a/userguide/ft_engineering/tfidf.html b/userguide/ft_engineering/tfidf.html index fe734e1..d71b2c7 100644 --- a/userguide/ft_engineering/tfidf.html +++ b/userguide/ft_engineering/tfidf.html @@ -97,7 +97,7 @@ <link rel="shortcut icon" href="../gitbook/images/favicon.ico" type="image/x-icon"> - <link rel="next" href="../eval/stat_eval.html" /> + <link rel="next" href="../eval/binary_classification_measures.html" /> <link rel="prev" href="quantify.html" /> @@ -244,7 +244,7 @@ <b>1.3.1.</b> - Explicit addBias() for better prediction + Explicit add_bias() for better prediction </a> @@ -707,14 +707,14 @@ - <li class="chapter " data-level="4.1" data-path="../eval/stat_eval.html"> + <li class="chapter " data-level="4.1" data-path="../eval/binary_classification_measures.html"> - <a href="../eval/stat_eval.html"> + <a href="../eval/binary_classification_measures.html"> <b>4.1.</b> - Statistical evaluation of a prediction model + Binary Classification Metrics </a> @@ -743,13 +743,43 @@ </li> - <li class="chapter " data-level="4.2" data-path="../eval/rank.html"> + <li class="chapter " data-level="4.2" data-path="../eval/multilabel_classification_measures.html"> - <a href="../eval/rank.html"> + <a href="../eval/multilabel_classification_measures.html"> <b>4.2.</b> + Multi-label Classification Metrics + + </a> + + + + </li> + + <li class="chapter " data-level="4.3" data-path="../eval/regression.html"> + + <a href="../eval/regression.html"> + + + <b>4.3.</b> + + Regression metrics + + </a> + + + + </li> + + <li class="chapter " data-level="4.4" data-path="../eval/rank.html"> + + <a href="../eval/rank.html"> + + + <b>4.4.</b> + Ranking Measures </a> @@ -758,12 +788,12 @@ </li> - <li class="chapter " data-level="4.3" data-path="../eval/datagen.html"> + <li class="chapter " data-level="4.5" data-path="../eval/datagen.html"> <a href="../eval/datagen.html"> - <b>4.3.</b> + <b>4.5.</b> Data Generation @@ -774,12 +804,12 @@ <ul class="articles"> - <li class="chapter " data-level="4.3.1" data-path="../eval/lr_datagen.html"> + <li class="chapter " data-level="4.5.1" data-path="../eval/lr_datagen.html"> <a href="../eval/lr_datagen.html"> - <b>4.3.1.</b> + <b>4.5.1.</b> Logistic Regression data generation @@ -2157,7 +2187,18 @@ --> <p>This document explains how to compute <a href="http://en.wikipedia.org/wiki/Tf%E2%80%93idf" target="_blank">TF-IDF</a> with Apache Hive/Hivemall.</p> <p>What you need to compute TF-IDF is a table/view composing (docid, word) pair, 2 views, and 1 query.</p> -<p><em>Note that this feature is supported since Hivemall v0.3-beta3 or later. Macro is supported since Hive 0.12 or later.</em></p> +<!-- toc --><div id="toc" class="toc"> + +<ul> +<li><a href="#define-macros-used-in-the-tf-idf-computation">Define macros used in the TF-IDF computation</a></li> +<li><a href="#data-preparation">Data preparation</a></li> +<li><a href="#define-views-of-tfdf">Define views of TF/DF</a></li> +<li><a href="#tf-idf-calculation-for-each-docidword-pair">TF-IDF calculation for each docid/word pair</a></li> +<li><a href="#feature-vector-with-tf-idf-values">Feature Vector with TF-IDF values</a></li> +</ul> + +</div><!-- tocstop --> +<div class="panel panel-primary"><div class="panel-heading"><h3 class="panel-title" id="note"><i class="fa fa-edit"></i> Note</h3></div><div class="panel-body"><p>This feature is supported since Hivemall v0.3-beta3 or later. Macro is supported since Hive 0.12 or later.</p></div></div> <h1 id="define-macros-used-in-the-tf-idf-computation">Define macros used in the TF-IDF computation</h1> <pre><code class="lang-sql"><span class="hljs-keyword">create</span> <span class="hljs-keyword">temporary</span> macro max2(x <span class="hljs-built_in">INT</span>, y <span class="hljs-built_in">INT</span>) <span class="hljs-keyword">if</span>(x>y,x,y); @@ -2340,7 +2381,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"TF-IDF Calculation","level":"3.7","depth":1,"next":{"title":"Statistical evaluation of a prediction model","level":"4.1","depth":1,"path":"eval/stat_eval.md","ref":"eval/stat_eval.md","articles":[{"title":"Area Under the ROC Curve","level":"4.1.1","depth":2,"path":"eval/auc.md","ref":"eval/auc.md","articles":[]}]},"previous":{"title":"Quantify non-number features","level":"3.6.2","depth":2,"path":"ft_engineering/quantify.md","ref":"ft_engineering/quantify.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{ },"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css" ,"epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"ft_engineering/tfidf.md","mtime":"2016-12-02T08:02:42.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-07-14T17:59:22.591Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"TF-IDF Calculation","level":"3.7","depth":1,"next":{"title":"Binary Classification Metrics","level":"4.1","depth":1,"path":"eval/binary_classification_measures.md","ref":"eval/binary_classification_measures.md","articles":[{"title":"Area Under the ROC Curve","level":"4.1.1","depth":2,"path":"eval/auc.md","ref":"eval/auc.md","articles":[]}]},"previous":{"title":"Quantify non-number features","level":"3.6.2","depth":2,"path":"ft_engineering/quantify.md","ref":"ft_engineering/quantify.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{ "emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website. css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"ft_engineering/tfidf.md","mtime":"2017-07-20T11:24:46.000Z","type":"markdown"},"gitbook":{"version":"3.2.2","time":"2017-09-13T14:07:31.053Z"},"basePath":"..","book":{"language":""}}); }); </script> </div>
