http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/e1e876db/userguide/misc/funcs.html ---------------------------------------------------------------------- diff --git a/userguide/misc/funcs.html b/userguide/misc/funcs.html index c43faed..f59575c 100644 --- a/userguide/misc/funcs.html +++ b/userguide/misc/funcs.html @@ -2604,7 +2604,80 @@ Google Maps: https://www.google.com/maps/@${lat},${lon},${zoom}z </ul> <h1 id="sketching">Sketching</h1> <ul> -<li><code>approx_count_distinct(expr x [, const string options])</code> - Returns an approximation of count(DISTINCT x) using HyperLogLogPlus algorithm</li> +<li><p><code>approx_count_distinct(expr x [, const string options])</code> - Returns an approximation of count(DISTINCT x) using HyperLogLogPlus algorithm</p> +</li> +<li><p><code>bloom(string key)</code> - Constructs a BloomFilter by aggregating a set of keys</p> +<pre><code class="lang-sql"><span class="hljs-keyword">CREATE</span> <span class="hljs-keyword">TABLE</span> satisfied_movies <span class="hljs-keyword">AS</span> + <span class="hljs-keyword">SELECT</span> bloom(movieid) <span class="hljs-keyword">as</span> movies + <span class="hljs-keyword">FROM</span> ( + <span class="hljs-keyword">SELECT</span> movieid + <span class="hljs-keyword">FROM</span> ratings + <span class="hljs-keyword">GROUP</span> <span class="hljs-keyword">BY</span> movieid + <span class="hljs-keyword">HAVING</span> <span class="hljs-keyword">avg</span>(rating) >= <span class="hljs-number">4.0</span> + ) t; +</code></pre> +</li> +<li><p><code>bloom_and(string bloom1, string bloom2)</code> - Returns the logical AND of two bloom filters</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> bloom_and(bf1, bf2) <span class="hljs-keyword">FROM</span> xxx; +</code></pre> +</li> +<li><p><code>bloom_contains(string bloom, string key)</code> or <em>FUNC</em>(string bloom, array<string> keys) - Returns true if the bloom filter contains all the given key(s). Returns false if key is null.</p> +<pre><code class="lang-sql">WITH satisfied_movies as ( + <span class="hljs-keyword">SELECT</span> bloom(movieid) <span class="hljs-keyword">as</span> movies + <span class="hljs-keyword">FROM</span> ( + <span class="hljs-keyword">SELECT</span> movieid + <span class="hljs-keyword">FROM</span> ratings + <span class="hljs-keyword">GROUP</span> <span class="hljs-keyword">BY</span> movieid + <span class="hljs-keyword">HAVING</span> <span class="hljs-keyword">avg</span>(rating) >= <span class="hljs-number">4.0</span> + ) t +) +<span class="hljs-keyword">SELECT</span> + l.rating, + <span class="hljs-keyword">count</span>(<span class="hljs-keyword">distinct</span> l.userid) <span class="hljs-keyword">as</span> cnt +<span class="hljs-keyword">FROM</span> + ratings l + <span class="hljs-keyword">CROSS</span> <span class="hljs-keyword">JOIN</span> satisfied_movies r +<span class="hljs-keyword">WHERE</span> + bloom_contains(r.movies, l.movieid) <span class="hljs-comment">-- includes false positive</span> +<span class="hljs-keyword">GROUP</span> <span class="hljs-keyword">BY</span> + l.rating; + +l.rating cnt +1 1296 +2 2770 +3 5008 +4 5824 +5 5925 +</code></pre> +</li> +<li><p><code>bloom_contains_any(string bloom, string key)</code> or <em>FUNC</em>(string bloom, array<string> keys)- Returns true if the bloom filter contains any of the given key</p> +<pre><code class="lang-sql">WITH data1 as ( + <span class="hljs-keyword">SELECT</span> explode(<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>,<span class="hljs-number">2</span>,<span class="hljs-number">3</span>,<span class="hljs-number">4</span>,<span class="hljs-number">5</span>)) <span class="hljs-keyword">as</span> <span class="hljs-keyword">id</span> +), +data2 <span class="hljs-keyword">as</span> ( + <span class="hljs-keyword">SELECT</span> explode(<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>,<span class="hljs-number">3</span>,<span class="hljs-number">5</span>,<span class="hljs-number">6</span>,<span class="hljs-number">8</span>)) <span class="hljs-keyword">as</span> <span class="hljs-keyword">id</span> +), +bloom <span class="hljs-keyword">as</span> ( + <span class="hljs-keyword">SELECT</span> bloom(<span class="hljs-keyword">id</span>) <span class="hljs-keyword">as</span> bf + <span class="hljs-keyword">FROM</span> data1 +) +<span class="hljs-keyword">SELECT</span> + l.* +<span class="hljs-keyword">FROM</span> + data2 l + <span class="hljs-keyword">CROSS</span> <span class="hljs-keyword">JOIN</span> bloom r +<span class="hljs-keyword">WHERE</span> + bloom_contains_any(r.bf, <span class="hljs-built_in">array</span>(l.<span class="hljs-keyword">id</span>)) +</code></pre> +</li> +<li><p><code>bloom_not(string bloom)</code> - Returns the logical NOT of a bloom filters</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> bloom_not(bf) <span class="hljs-keyword">FROM</span> xxx; +</code></pre> +</li> +<li><p><code>bloom_or(string bloom1, string bloom2)</code> - Returns the logical OR of two bloom filters</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> bloom_or(bf1, bf2) <span class="hljs-keyword">FROM</span> xxx; +</code></pre> +</li> </ul> <h1 id="ensemble-learning">Ensemble learning</h1> <ul> @@ -2659,7 +2732,7 @@ Google Maps: https://www.google.com/maps/@${lat},${lon},${zoom}z <h1 id="others">Others</h1> <ul> <li><p><code>hivemall_version()</code> - Returns the version of Hivemall</p> -<pre><code class="lang-sql">Usage: <span class="hljs-keyword">SELECT</span> hivemall_version(); +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> hivemall_version(); </code></pre> </li> <li><p><code>lr_datagen(options string)</code> - Generates a logistic regression dataset</p> @@ -2724,7 +2797,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"List of Functions","level":"1.3","depth":1,"next":{"title":"Tips for Effective Hivemall","level":"1.4","depth":1,"path":"tips/README.md","ref":"tips/README.md","articles":[{"title":"Explicit add_bias() for better prediction","level":"1.4.1","depth":2,"path":"tips/addbias.md","ref":"tips/addbias.md","articles":[]},{"title":"Use rand_amplify() to better prediction results","level":"1.4.2","depth":2,"path":"tips/rand_amplify.md","ref":"tips/rand_amplify.md","articles":[]},{"title":"Real-time prediction on RDBMS","level":"1.4.3","depth":2,"path":"tips/rt_prediction.md","ref":"tips/rt_prediction.md","articles":[]},{"title":"Ensemble learning for stable prediction","level":"1.4.4","depth":2,"path":"tips/ensemble_learning.md","ref":"tips/ensemble_learning.md","articles":[]},{"title":"Mixing models for a better prediction convergence (MIX server)","level":"1.4.5","depth":2,"path":"tips/mixserver.md","ref":"tips/mixserver.md","articles":[ ]},{"title":"Run Hivemall on Amazon Elastic MapReduce","level":"1.4.6","depth":2,"path":"tips/emr.md","ref":"tips/emr.md","articles":[]}]},"previous":{"title":"Input Format","level":"1.2.3","depth":2,"path":"getting_started/input-format.md","ref":"getting_started/input-format.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/git book","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters" :{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"misc/funcs.md","mtime":"2018-04-25T08:11:03.000Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-04-26T03:55:31.199Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"List of Functions","level":"1.3","depth":1,"next":{"title":"Tips for Effective Hivemall","level":"1.4","depth":1,"path":"tips/README.md","ref":"tips/README.md","articles":[{"title":"Explicit add_bias() for better prediction","level":"1.4.1","depth":2,"path":"tips/addbias.md","ref":"tips/addbias.md","articles":[]},{"title":"Use rand_amplify() to better prediction results","level":"1.4.2","depth":2,"path":"tips/rand_amplify.md","ref":"tips/rand_amplify.md","articles":[]},{"title":"Real-time prediction on RDBMS","level":"1.4.3","depth":2,"path":"tips/rt_prediction.md","ref":"tips/rt_prediction.md","articles":[]},{"title":"Ensemble learning for stable prediction","level":"1.4.4","depth":2,"path":"tips/ensemble_learning.md","ref":"tips/ensemble_learning.md","articles":[]},{"title":"Mixing models for a better prediction convergence (MIX server)","level":"1.4.5","depth":2,"path":"tips/mixserver.md","ref":"tips/mixserver.md","articles":[ ]},{"title":"Run Hivemall on Amazon Elastic MapReduce","level":"1.4.6","depth":2,"path":"tips/emr.md","ref":"tips/emr.md","articles":[]}]},"previous":{"title":"Input Format","level":"1.2.3","depth":2,"path":"getting_started/input-format.md","ref":"getting_started/input-format.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/git book","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters" :{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"misc/funcs.md","mtime":"2018-06-06T08:56:31.022Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-06-06T09:01:20.330Z"},"basePath":"..","book":{"language":""}}); }); </script> </div>
http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/e1e876db/userguide/misc/generic_funcs.html ---------------------------------------------------------------------- diff --git a/userguide/misc/generic_funcs.html b/userguide/misc/generic_funcs.html index 694d3b6..d7972f6 100644 --- a/userguide/misc/generic_funcs.html +++ b/userguide/misc/generic_funcs.html @@ -2220,13 +2220,18 @@ <ul> <li><a href="#array">Array</a></li> -<li><a href="#map">Map</a></li> <li><a href="#bitset">Bitset</a></li> <li><a href="#compression">Compression</a></li> +<li><a href="#datetime">Datetime</a></li> +<li><a href="#json">JSON</a></li> +<li><a href="#map">Map</a></li> <li><a href="#mapreduce">MapReduce</a></li> <li><a href="#math">Math</a></li> <li><a href="#matrix">Matrix</a></li> +<li><a href="#sanity-checks">Sanity Checks</a></li> <li><a href="#text-processing">Text processing</a></li> +<li><a href="#timeseries">Timeseries</a></li> +<li><a href="#vector">Vector</a></li> <li><a href="#others">Others</a></li> </ul> @@ -2234,80 +2239,167 @@ <h1 id="array">Array</h1> <ul> <li><p><code>array_append(array<T> arr, T elem)</code> - Append an element to the end of an array</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> array_append(<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>,<span class="hljs-number">2</span>),<span class="hljs-number">3</span>); + 1,2,3 + +<span class="hljs-keyword">SELECT</span> array_append(<span class="hljs-built_in">array</span>(<span class="hljs-string">'a'</span>,<span class="hljs-string">'b'</span>),<span class="hljs-string">'c'</span>); + "a","b","c" +</code></pre> </li> <li><p><code>array_avg(array<number>)</code> - Returns an array<double> in which each element is the mean of a set of numbers</p> </li> <li><p><code>array_concat(array<ANY> x1, array<ANY> x2, ..)</code> - Returns a concatenated array</p> -<pre><code class="lang-sql">select array_concat(array(1),array(2,3)); -> [1,2,3] +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> array_concat(<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>),<span class="hljs-built_in">array</span>(<span class="hljs-number">2</span>,<span class="hljs-number">3</span>)); + [1,2,3] </code></pre> </li> <li><p><code>array_flatten(array<array<ANY>>)</code> - Returns an array with the elements flattened.</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> array_flatten(<span class="hljs-built_in">array</span>(<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>,<span class="hljs-number">2</span>,<span class="hljs-number">3</span>),<span class="hljs-built_in">array</span>(<span class="hljs-number">4</span>,<span class="hljs-number">5</span>),<span class="hljs-built_in">array</span>(<span class="hljs-number">6</span>,<span class="hljs-number">7</span>,<span class="hljs-number">8</span>))); + [1,2,3,4,5,6,7,8] +</code></pre> </li> <li><p><code>array_intersect(array<ANY> x1, array<ANY> x2, ..)</code> - Returns an intersect of given arrays</p> -<pre><code class="lang-sql">select array_intersect(array(1,3,4),array(2,3,4),array(3,5)); -> [3] +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> array_intersect(<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>,<span class="hljs-number">3</span>,<span class="hljs-number">4</span>),<span class="hljs-built_in">array</span>(<span class="hljs-number">2</span>,<span class="hljs-number">3</span>,<span class="hljs-number">4</span>),<span class="hljs-built_in">array</span>(<span class="hljs-number">3</span>,<span class="hljs-number">5</span>)); + [3] </code></pre> </li> <li><p><code>array_remove(array<int|text> original, int|text|array<int> target)</code> - Returns an array that the target is removed from the original array</p> -<pre><code class="lang-sql">select array_remove(array(1,null,3),array(null)); -> [3] +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> array_remove(<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>,<span class="hljs-literal">null</span>,<span class="hljs-number">3</span>),<span class="hljs-built_in">array</span>(<span class="hljs-literal">null</span>)); + [3] -select array_remove(array("aaa","bbb"),"bbb"); -> ["aaa"] +<span class="hljs-keyword">SELECT</span> array_remove(<span class="hljs-built_in">array</span>(<span class="hljs-string">"aaa"</span>,<span class="hljs-string">"bbb"</span>),<span class="hljs-string">"bbb"</span>); + ["aaa"] </code></pre> </li> <li><p><code>array_slice(array<ANY> values, int offset [, int length])</code> - Slices the given array by the given offset and length parameters.</p> -<pre><code class="lang-sql">select array_slice(array(1,2,3,4,5,6), 2,4); -> [3,4] +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> + array_slice(<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>,<span class="hljs-number">2</span>,<span class="hljs-number">3</span>,<span class="hljs-number">4</span>,<span class="hljs-number">5</span>,<span class="hljs-number">6</span>), <span class="hljs-number">2</span>,<span class="hljs-number">4</span>), + array_slice( + <span class="hljs-built_in">array</span>(<span class="hljs-string">"zero"</span>, <span class="hljs-string">"one"</span>, <span class="hljs-string">"two"</span>, <span class="hljs-string">"three"</span>, <span class="hljs-string">"four"</span>, <span class="hljs-string">"five"</span>, <span class="hljs-string">"six"</span>, <span class="hljs-string">"seven"</span>, <span class="hljs-string">"eight"</span>, <span class="hljs-string">"nine"</span>, <span class="hljs-string">"ten"</span>), + <span class="hljs-number">0</span>, <span class="hljs-comment">-- offset</span> + <span class="hljs-number">2</span> <span class="hljs-comment">-- length</span> + ), + array_slice( + <span class="hljs-built_in">array</span>(<span class="hljs-string">"zero"</span>, <span class="hljs-string">"one"</span>, <span class="hljs-string">"two"</span>, <span class="hljs-string">"three"</span>, <span class="hljs-string">"four"</span>, <span class="hljs-string">"five"</span>, <span class="hljs-string">"six"</span>, <span class="hljs-string">"seven"</span>, <span class="hljs-string">"eight"</span>, <span class="hljs-string">"nine"</span>, <span class="hljs-string">"ten"</span>), + <span class="hljs-number">6</span>, <span class="hljs-comment">-- offset</span> + <span class="hljs-number">3</span> <span class="hljs-comment">-- length</span> + ), + array_slice( + <span class="hljs-built_in">array</span>(<span class="hljs-string">"zero"</span>, <span class="hljs-string">"one"</span>, <span class="hljs-string">"two"</span>, <span class="hljs-string">"three"</span>, <span class="hljs-string">"four"</span>, <span class="hljs-string">"five"</span>, <span class="hljs-string">"six"</span>, <span class="hljs-string">"seven"</span>, <span class="hljs-string">"eight"</span>, <span class="hljs-string">"nine"</span>, <span class="hljs-string">"ten"</span>), + <span class="hljs-number">6</span>, <span class="hljs-comment">-- offset</span> + <span class="hljs-number">10</span> <span class="hljs-comment">-- length</span> + ), + array_slice( + <span class="hljs-built_in">array</span>(<span class="hljs-string">"zero"</span>, <span class="hljs-string">"one"</span>, <span class="hljs-string">"two"</span>, <span class="hljs-string">"three"</span>, <span class="hljs-string">"four"</span>, <span class="hljs-string">"five"</span>, <span class="hljs-string">"six"</span>, <span class="hljs-string">"seven"</span>, <span class="hljs-string">"eight"</span>, <span class="hljs-string">"nine"</span>, <span class="hljs-string">"ten"</span>), + <span class="hljs-number">6</span> <span class="hljs-comment">-- offset</span> + ), + array_slice( + <span class="hljs-built_in">array</span>(<span class="hljs-string">"zero"</span>, <span class="hljs-string">"one"</span>, <span class="hljs-string">"two"</span>, <span class="hljs-string">"three"</span>, <span class="hljs-string">"four"</span>, <span class="hljs-string">"five"</span>, <span class="hljs-string">"six"</span>, <span class="hljs-string">"seven"</span>, <span class="hljs-string">"eight"</span>, <span class="hljs-string">"nine"</span>, <span class="hljs-string">"ten"</span>), + <span class="hljs-number">-3</span> <span class="hljs-comment">-- offset</span> + ), + array_slice( + <span class="hljs-built_in">array</span>(<span class="hljs-string">"zero"</span>, <span class="hljs-string">"one"</span>, <span class="hljs-string">"two"</span>, <span class="hljs-string">"three"</span>, <span class="hljs-string">"four"</span>, <span class="hljs-string">"five"</span>, <span class="hljs-string">"six"</span>, <span class="hljs-string">"seven"</span>, <span class="hljs-string">"eight"</span>, <span class="hljs-string">"nine"</span>, <span class="hljs-string">"ten"</span>), + <span class="hljs-number">-3</span>, <span class="hljs-comment">-- offset</span> + <span class="hljs-number">2</span> <span class="hljs-comment">-- length</span> + ); + + [3,4] + ["zero","one"] + ["six","seven","eight"] + ["six","seven","eight","nine","ten"] + ["six","seven","eight","nine","ten"] + ["eight","nine","ten"] + ["eight","nine"] </code></pre> </li> <li><p><code>array_sum(array<number>)</code> - Returns an array<double> in which each element is summed up</p> </li> +<li><p><code>array_to_str(array arr [, string sep=','])</code> - Convert array to string using a sperator</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> array_to_str(<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>,<span class="hljs-number">2</span>,<span class="hljs-number">3</span>),<span class="hljs-string">'-'</span>); +1-2-3 +</code></pre> +</li> <li><p><code>array_union(array1, array2, ...)</code> - Returns the union of a set of arrays</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> array_union(<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>,<span class="hljs-number">2</span>),<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>,<span class="hljs-number">2</span>)); +[1,2] + +<span class="hljs-keyword">SELECT</span> array_union(<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>,<span class="hljs-number">2</span>),<span class="hljs-built_in">array</span>(<span class="hljs-number">2</span>,<span class="hljs-number">3</span>),<span class="hljs-built_in">array</span>(<span class="hljs-number">2</span>,<span class="hljs-number">5</span>)); +[1,2,3,5] +</code></pre> </li> <li><p><code>conditional_emit(array<boolean> conditions, array<primitive> features)</code> - Emit features of a row according to various conditions</p> +<pre><code class="lang-sql">WITH input as ( + <span class="hljs-keyword">select</span> <span class="hljs-built_in">array</span>(<span class="hljs-literal">true</span>, <span class="hljs-literal">false</span>, <span class="hljs-literal">true</span>) <span class="hljs-keyword">as</span> conditions, <span class="hljs-built_in">array</span>(<span class="hljs-string">"one"</span>, <span class="hljs-string">"two"</span>, <span class="hljs-string">"three"</span>) <span class="hljs-keyword">as</span> features + <span class="hljs-keyword">UNION</span> ALL + <span class="hljs-keyword">select</span> <span class="hljs-built_in">array</span>(<span class="hljs-literal">true</span>, <span class="hljs-literal">true</span>, <span class="hljs-literal">false</span>), <span class="hljs-built_in">array</span>(<span class="hljs-string">"four"</span>, <span class="hljs-string">"five"</span>, <span class="hljs-string">"six"</span>) +) +<span class="hljs-keyword">SELECT</span> + conditional_emit( + conditions, features + ) +<span class="hljs-keyword">FROM</span> + <span class="hljs-keyword">input</span>; + one + three + four + five +</code></pre> </li> <li><p><code>element_at(array<T> list, int pos)</code> - Returns an element at the given position</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> element_at(<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>,<span class="hljs-number">2</span>,<span class="hljs-number">3</span>,<span class="hljs-number">4</span>),<span class="hljs-number">0</span>); + 1 + +<span class="hljs-keyword">SELECT</span> element_at(<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>,<span class="hljs-number">2</span>,<span class="hljs-number">3</span>,<span class="hljs-number">4</span>),<span class="hljs-number">-2</span>); + 3 +</code></pre> </li> -<li><p><code>first_element(x)</code> - Returns the first element in an array </p> +<li><p><code>first_element(x)</code> - Returns the first element in an array</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> first_element(<span class="hljs-built_in">array</span>(<span class="hljs-string">'a'</span>,<span class="hljs-string">'b'</span>,<span class="hljs-string">'c'</span>)); + a + +<span class="hljs-keyword">SELECT</span> first_element(<span class="hljs-built_in">array</span>()); + NULL +</code></pre> </li> <li><p><code>float_array(nDims)</code> - Returns an array<float> of nDims elements</p> </li> <li><p><code>last_element(x)</code> - Return the last element in an array</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> last_element(<span class="hljs-built_in">array</span>(<span class="hljs-string">'a'</span>,<span class="hljs-string">'b'</span>,<span class="hljs-string">'c'</span>)); + c +</code></pre> </li> <li><p><code>select_k_best(array<number> array, const array<number> importance, const int k)</code> - Returns selected top-k elements as array<double></p> </li> <li><p><code>sort_and_uniq_array(array<int>)</code> - Takes array<int> and returns a sorted array with duplicate elements eliminated</p> -<pre><code class="lang-sql">select sort_and_uniq_array(array(3,1,1,-2,10)); -> [-2,1,3,10] +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> sort_and_uniq_array(<span class="hljs-built_in">array</span>(<span class="hljs-number">3</span>,<span class="hljs-number">1</span>,<span class="hljs-number">1</span>,<span class="hljs-number">-2</span>,<span class="hljs-number">10</span>)); + [-2,1,3,10] </code></pre> </li> <li><p><code>subarray_endwith(array<int|text> original, int|text key)</code> - Returns an array that ends with the specified key</p> -<pre><code class="lang-sql">select subarray_endwith(array(1,2,3,4), 3); -> [1,2,3] +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> subarray_endwith(<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>,<span class="hljs-number">2</span>,<span class="hljs-number">3</span>,<span class="hljs-number">4</span>), <span class="hljs-number">3</span>); + [1,2,3] </code></pre> </li> <li><p><code>subarray_startwith(array<int|text> original, int|text key)</code> - Returns an array that starts with the specified key</p> -<pre><code class="lang-sql">select subarray_startwith(array(1,2,3,4), 2); -> [2,3,4] +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> subarray_startwith(<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>,<span class="hljs-number">2</span>,<span class="hljs-number">3</span>,<span class="hljs-number">4</span>), <span class="hljs-number">2</span>); + [2,3,4] </code></pre> </li> <li><p><code>to_string_array(array<ANY>)</code> - Returns an array of strings</p> </li> <li><p><code>to_ordered_list(PRIMITIVE value [, PRIMITIVE key, const string options])</code> - Return list of values sorted by value itself or specific key</p> -<pre><code class="lang-sql">with t as ( - <span class="hljs-keyword">select</span> <span class="hljs-number">5</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">key</span>, <span class="hljs-string">'apple'</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span> - <span class="hljs-keyword">union</span> all - <span class="hljs-keyword">select</span> <span class="hljs-number">3</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">key</span>, <span class="hljs-string">'banana'</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span> - <span class="hljs-keyword">union</span> all - <span class="hljs-keyword">select</span> <span class="hljs-number">4</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">key</span>, <span class="hljs-string">'candy'</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span> - <span class="hljs-keyword">union</span> all - <span class="hljs-keyword">select</span> <span class="hljs-number">2</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">key</span>, <span class="hljs-string">'donut'</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span> - <span class="hljs-keyword">union</span> all - <span class="hljs-keyword">select</span> <span class="hljs-number">3</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">key</span>, <span class="hljs-string">'egg'</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span> +<pre><code class="lang-sql">WITH t as ( + <span class="hljs-keyword">SELECT</span> <span class="hljs-number">5</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">key</span>, <span class="hljs-string">'apple'</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span> + <span class="hljs-keyword">UNION</span> ALL + <span class="hljs-keyword">SELECT</span> <span class="hljs-number">3</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">key</span>, <span class="hljs-string">'banana'</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span> + <span class="hljs-keyword">UNION</span> ALL + <span class="hljs-keyword">SELECT</span> <span class="hljs-number">4</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">key</span>, <span class="hljs-string">'candy'</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span> + <span class="hljs-keyword">UNION</span> ALL + <span class="hljs-keyword">SELECT</span> <span class="hljs-number">2</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">key</span>, <span class="hljs-string">'donut'</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span> + <span class="hljs-keyword">UNION</span> ALL + <span class="hljs-keyword">SELECT</span> <span class="hljs-number">3</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">key</span>, <span class="hljs-string">'egg'</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">value</span> ) -<span class="hljs-keyword">select</span> <span class="hljs-comment">-- expected output</span> +<span class="hljs-keyword">SELECT</span> <span class="hljs-comment">-- expected output</span> to_ordered_list(<span class="hljs-keyword">value</span>, <span class="hljs-keyword">key</span>, <span class="hljs-string">'-reverse'</span>), <span class="hljs-comment">-- [apple, candy, (banana, egg | egg, banana), donut] (reverse order)</span> to_ordered_list(<span class="hljs-keyword">value</span>, <span class="hljs-keyword">key</span>, <span class="hljs-string">'-k 2'</span>), <span class="hljs-comment">-- [apple, candy] (top-k)</span> to_ordered_list(<span class="hljs-keyword">value</span>, <span class="hljs-keyword">key</span>, <span class="hljs-string">'-k 100'</span>), <span class="hljs-comment">-- [apple, candy, (banana, egg | egg, banana), dunut]</span> @@ -2319,17 +2411,215 @@ select array_remove(array("aaa","bbb"),"bbb"); to_ordered_list(<span class="hljs-keyword">value</span>, <span class="hljs-string">'-k 2'</span>), <span class="hljs-comment">-- [egg, donut] (alphabetically)</span> to_ordered_list(<span class="hljs-keyword">key</span>, <span class="hljs-string">'-k -2 -reverse'</span>), <span class="hljs-comment">-- [5, 4] (top-2 keys)</span> to_ordered_list(<span class="hljs-keyword">key</span>) <span class="hljs-comment">-- [2, 3, 3, 4, 5] (natural ordered keys)</span> -<span class="hljs-keyword">from</span> +<span class="hljs-keyword">FROM</span> t </code></pre> </li> </ul> +<h1 id="bitset">Bitset</h1> +<ul> +<li><p><code>bits_collect(int|long x)</code> - Returns a bitset in array<long></p> +</li> +<li><p><code>bits_or(array<long> b1, array<long> b2, ..)</code> - Returns a logical OR given bitsets</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> unbits(bits_or(to_bits(<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>,<span class="hljs-number">4</span>)),to_bits(<span class="hljs-built_in">array</span>(<span class="hljs-number">2</span>,<span class="hljs-number">3</span>)))); + [1,2,3,4] +</code></pre> +</li> +<li><p><code>to_bits(int[] indexes)</code> - Returns an bitset representation if the given indexes in long[]</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> to_bits(<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>,<span class="hljs-number">2</span>,<span class="hljs-number">3</span>,<span class="hljs-number">128</span>)); + [14,-9223372036854775808] +</code></pre> +</li> +<li><p><code>unbits(long[] bitset)</code> - Returns an long array of the give bitset representation</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> unbits(to_bits(<span class="hljs-built_in">array</span>(<span class="hljs-number">1</span>,<span class="hljs-number">4</span>,<span class="hljs-number">2</span>,<span class="hljs-number">3</span>))); + [1,2,3,4] +</code></pre> +</li> +</ul> +<h1 id="compression">Compression</h1> +<ul> +<li><p><code>deflate(TEXT data [, const int compressionLevel])</code> - Returns a compressed BINARY object by using Deflater. The compression level must be in range [-1,9]</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> base91(deflate(<span class="hljs-string">'aaaaaaaaaaaaaaaabbbbccc'</span>)); + AA+=kaIM|WTt!+wbGAA +</code></pre> +</li> +<li><p><code>inflate(BINARY compressedData)</code> - Returns a decompressed STRING by using Inflater</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> inflate(unbase91(base91(deflate(<span class="hljs-string">'aaaaaaaaaaaaaaaabbbbccc'</span>)))); + aaaaaaaaaaaaaaaabbbbccc +</code></pre> +</li> +</ul> +<h1 id="datetime">Datetime</h1> +<ul> +<li><code>sessionize(long timeInSec, long thresholdInSec [, String subject])</code>- Returns a UUID string of a session.<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> + sessionize(<span class="hljs-keyword">time</span>, <span class="hljs-number">3600</span>, ip_addr) <span class="hljs-keyword">as</span> session_id, + <span class="hljs-keyword">time</span>, ip_addr +<span class="hljs-keyword">FROM</span> ( + <span class="hljs-keyword">SELECT</span> <span class="hljs-keyword">time</span>, ipaddr + <span class="hljs-keyword">FROM</span> weblog + <span class="hljs-keyword">DISTRIBUTE</span> <span class="hljs-keyword">BY</span> ip_addr, <span class="hljs-keyword">time</span> <span class="hljs-keyword">SORT</span> <span class="hljs-keyword">BY</span> ip_addr, <span class="hljs-keyword">time</span> <span class="hljs-keyword">DESC</span> +) t1 +</code></pre> +</li> +</ul> +<h1 id="json">JSON</h1> +<ul> +<li><p><code>from_json(string jsonString, const string returnTypes [, const array<string>|const string columnNames])</code> - Return Hive object.</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> + from_json( + <span class="hljs-string">'{ "person" : { "name" : "makoto" , "age" : 37 } }'</span>, + <span class="hljs-string">'struct<name:string,age:int>'</span>, + <span class="hljs-built_in">array</span>(<span class="hljs-string">'person'</span>) + ), + from_json( + <span class="hljs-string">'[0.1,1.1,2.2]'</span>, + <span class="hljs-string">'array<double>'</span> + ), + from_json(to_json( + <span class="hljs-built_in">ARRAY</span>( + NAMED_STRUCT(<span class="hljs-string">"country"</span>, <span class="hljs-string">"japan"</span>, <span class="hljs-string">"city"</span>, <span class="hljs-string">"tokyo"</span>), + NAMED_STRUCT(<span class="hljs-string">"country"</span>, <span class="hljs-string">"japan"</span>, <span class="hljs-string">"city"</span>, <span class="hljs-string">"osaka"</span>) + ) + ),<span class="hljs-string">'array<struct<country:string,city:string>>'</span>), + from_json(to_json( + <span class="hljs-built_in">ARRAY</span>( + NAMED_STRUCT(<span class="hljs-string">"country"</span>, <span class="hljs-string">"japan"</span>, <span class="hljs-string">"city"</span>, <span class="hljs-string">"tokyo"</span>), + NAMED_STRUCT(<span class="hljs-string">"country"</span>, <span class="hljs-string">"japan"</span>, <span class="hljs-string">"city"</span>, <span class="hljs-string">"osaka"</span>) + ), + <span class="hljs-built_in">array</span>(<span class="hljs-string">'city'</span>) + ), <span class="hljs-string">'array<struct<country:string,city:string>>'</span>), + from_json(to_json( + <span class="hljs-built_in">ARRAY</span>( + NAMED_STRUCT(<span class="hljs-string">"country"</span>, <span class="hljs-string">"japan"</span>, <span class="hljs-string">"city"</span>, <span class="hljs-string">"tokyo"</span>), + NAMED_STRUCT(<span class="hljs-string">"country"</span>, <span class="hljs-string">"japan"</span>, <span class="hljs-string">"city"</span>, <span class="hljs-string">"osaka"</span>) + ) + ),<span class="hljs-string">'array<struct<city:string>>'</span>); +</code></pre> +<pre><code> {"name":"makoto","age":37} + [0.1,1.1,2.2] + [{"country":"japan","city":"tokyo"},{"country":"japan","city":"osaka"}] + [{"country":"japan","city":"tokyo"},{"country":"japan","city":"osaka"}] + [{"city":"tokyo"},{"city":"osaka"}] +</code></pre></li> +<li><p><code>to_json(ANY object [, const array<string>|const string columnNames])</code> - Returns Json string</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> + NAMED_STRUCT(<span class="hljs-string">"Name"</span>, <span class="hljs-string">"John"</span>, <span class="hljs-string">"age"</span>, <span class="hljs-number">31</span>), + to_json( + NAMED_STRUCT(<span class="hljs-string">"Name"</span>, <span class="hljs-string">"John"</span>, <span class="hljs-string">"age"</span>, <span class="hljs-number">31</span>) + ), + to_json( + NAMED_STRUCT(<span class="hljs-string">"Name"</span>, <span class="hljs-string">"John"</span>, <span class="hljs-string">"age"</span>, <span class="hljs-number">31</span>), + <span class="hljs-built_in">array</span>(<span class="hljs-string">'Name'</span>, <span class="hljs-string">'age'</span>) + ), + to_json( + NAMED_STRUCT(<span class="hljs-string">"Name"</span>, <span class="hljs-string">"John"</span>, <span class="hljs-string">"age"</span>, <span class="hljs-number">31</span>), + <span class="hljs-built_in">array</span>(<span class="hljs-string">'name'</span>, <span class="hljs-string">'age'</span>) + ), + to_json( + NAMED_STRUCT(<span class="hljs-string">"Name"</span>, <span class="hljs-string">"John"</span>, <span class="hljs-string">"age"</span>, <span class="hljs-number">31</span>), + <span class="hljs-built_in">array</span>(<span class="hljs-string">'age'</span>) + ), + to_json( + NAMED_STRUCT(<span class="hljs-string">"Name"</span>, <span class="hljs-string">"John"</span>, <span class="hljs-string">"age"</span>, <span class="hljs-number">31</span>), + <span class="hljs-built_in">array</span>() + ), + to_json( + <span class="hljs-literal">null</span>, + <span class="hljs-built_in">array</span>() + ), + to_json( + <span class="hljs-keyword">struct</span>(<span class="hljs-string">"123"</span>, <span class="hljs-string">"456"</span>, <span class="hljs-number">789</span>, <span class="hljs-built_in">array</span>(<span class="hljs-number">314</span>,<span class="hljs-number">007</span>)), + <span class="hljs-built_in">array</span>(<span class="hljs-string">'ti'</span>,<span class="hljs-string">'si'</span>,<span class="hljs-string">'i'</span>,<span class="hljs-string">'bi'</span>) + ), + to_json( + <span class="hljs-keyword">struct</span>(<span class="hljs-string">"123"</span>, <span class="hljs-string">"456"</span>, <span class="hljs-number">789</span>, <span class="hljs-built_in">array</span>(<span class="hljs-number">314</span>,<span class="hljs-number">007</span>)), + <span class="hljs-string">'ti,si,i,bi'</span> + ), + to_json( + <span class="hljs-keyword">struct</span>(<span class="hljs-string">"123"</span>, <span class="hljs-string">"456"</span>, <span class="hljs-number">789</span>, <span class="hljs-built_in">array</span>(<span class="hljs-number">314</span>,<span class="hljs-number">007</span>)) + ), + to_json( + NAMED_STRUCT(<span class="hljs-string">"country"</span>, <span class="hljs-string">"japan"</span>, <span class="hljs-string">"city"</span>, <span class="hljs-string">"tokyo"</span>) + ), + to_json( + NAMED_STRUCT(<span class="hljs-string">"country"</span>, <span class="hljs-string">"japan"</span>, <span class="hljs-string">"city"</span>, <span class="hljs-string">"tokyo"</span>), + <span class="hljs-built_in">array</span>(<span class="hljs-string">'city'</span>) + ), + to_json( + <span class="hljs-built_in">ARRAY</span>( + NAMED_STRUCT(<span class="hljs-string">"country"</span>, <span class="hljs-string">"japan"</span>, <span class="hljs-string">"city"</span>, <span class="hljs-string">"tokyo"</span>), + NAMED_STRUCT(<span class="hljs-string">"country"</span>, <span class="hljs-string">"japan"</span>, <span class="hljs-string">"city"</span>, <span class="hljs-string">"osaka"</span>) + ) + ), + to_json( + <span class="hljs-built_in">ARRAY</span>( + NAMED_STRUCT(<span class="hljs-string">"country"</span>, <span class="hljs-string">"japan"</span>, <span class="hljs-string">"city"</span>, <span class="hljs-string">"tokyo"</span>), + NAMED_STRUCT(<span class="hljs-string">"country"</span>, <span class="hljs-string">"japan"</span>, <span class="hljs-string">"city"</span>, <span class="hljs-string">"osaka"</span>) + ), + <span class="hljs-built_in">array</span>(<span class="hljs-string">'city'</span>) + ); +</code></pre> +<pre><code> {"name":"John","age":31} + {"name":"John","age":31} + {"Name":"John","age":31} + {"name":"John","age":31} + {"age":31} + {} + NULL + {"ti":"123","si":"456","i":789,"bi":[314,7]} + {"ti":"123","si":"456","i":789,"bi":[314,7]} + {"col1":"123","col2":"456","col3":789,"col4":[314,7]} + {"country":"japan","city":"tokyo"} + {"city":"tokyo"} + [{"country":"japan","city":"tokyo"},{"country":"japan","city":"osaka"}] + [{"country":"japan","city":"tokyo"},{"country":"japan","city":"osaka"}] +</code></pre></li> +</ul> <h1 id="map">Map</h1> <ul> +<li><p><code>map_exclude_keys(Map<K,V> map, array<K> filteringKeys)</code> - Returns the filtered entries of a map not having specified keys</p> +<pre><code class="lang-sql">SELECT map_exclude_keys(map(1,'one',2,'two',3,'three'),array(2,3)); +{1:"one"} +</code></pre> +</li> <li><p><code>map_get_sum(map<int,float> src, array<int> keys)</code> - Returns sum of values that are retrieved by keys</p> </li> +<li><p><code>map_include_keys(Map<K,V> map, array<K> filteringKeys)</code> - Returns the filtered entries of a map having specified keys</p> +<pre><code class="lang-sql">SELECT map_include_keys(map(1,'one',2,'two',3,'three'),array(2,3)); +{2:"two",3:"three"} +</code></pre> +</li> +<li><p><code>map_index(a, n)</code> - Returns the n-th element of the given array</p> +<pre><code class="lang-sql">WITH tmp as ( + <span class="hljs-keyword">SELECT</span> <span class="hljs-string">"one"</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">key</span> + <span class="hljs-keyword">UNION</span> ALL + <span class="hljs-keyword">SELECT</span> <span class="hljs-string">"two"</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">key</span> +) +<span class="hljs-keyword">SELECT</span> map_index(<span class="hljs-keyword">map</span>(<span class="hljs-string">"one"</span>,<span class="hljs-number">1</span>,<span class="hljs-string">"two"</span>,<span class="hljs-number">2</span>),<span class="hljs-keyword">key</span>) +<span class="hljs-keyword">FROM</span> tmp; + +1 +2 +</code></pre> +</li> +<li><p><code>map_key_values(map)</code> - Returns a array of key-value pairs.</p> +<pre><code class="lang-sql">SELECT map_key_values(map("one",1,"two",2)); + +[{"key":"one","value":1},{"key":"two","value":2}] +</code></pre> +</li> <li><p><code>map_tail_n(map SRC, int N)</code> - Returns the last N elements from a sorted array of SRC</p> </li> +<li><p><code>merge_maps(x)</code> - Returns a map which contains the union of an aggregation of maps. Note that an existing value of a key can be replaced with the other duplicate key entry.</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> + merge_maps(m) +<span class="hljs-keyword">FROM</span> ( + <span class="hljs-keyword">SELECT</span> <span class="hljs-keyword">map</span>(<span class="hljs-string">'A'</span>,<span class="hljs-number">10</span>,<span class="hljs-string">'B'</span>,<span class="hljs-number">20</span>,<span class="hljs-string">'C'</span>,<span class="hljs-number">30</span>) + <span class="hljs-keyword">UNION</span> ALL + <span class="hljs-keyword">SELECT</span> <span class="hljs-keyword">map</span>(<span class="hljs-string">'A'</span>,<span class="hljs-number">10</span>,<span class="hljs-string">'B'</span>,<span class="hljs-number">20</span>,<span class="hljs-string">'C'</span>,<span class="hljs-number">30</span>) +) t +</code></pre> +</li> <li><p><code>to_map(key, value)</code> - Convert two aggregated columns into a key-value map</p> </li> <li><p><code>to_ordered_map(key, value [, const int k|const boolean reverseOrder=false])</code> - Convert two aggregated columns into an ordered key-value map</p> @@ -2355,39 +2645,6 @@ select array_remove(array("aaa","bbb"),"bbb"); </code></pre> </li> </ul> -<h1 id="bitset">Bitset</h1> -<ul> -<li><p><code>bits_collect(int|long x)</code> - Returns a bitset in array<long></p> -</li> -<li><p><code>bits_or(array<long> b1, array<long> b2, ..)</code> - Returns a logical OR given bitsets</p> -<pre><code class="lang-sql">select unbits(bits_or(to_bits(array(1,4)),to_bits(array(2,3)))); -> [1,2,3,4] -</code></pre> -</li> -<li><p><code>to_bits(int[] indexes)</code> - Returns an bitset representation if the given indexes in long[]</p> -<pre><code class="lang-sql">select to_bits(array(1,2,3,128)); -> [14,-9223372036854775808] -</code></pre> -</li> -<li><p><code>unbits(long[] bitset)</code> - Returns an long array of the give bitset representation</p> -<pre><code class="lang-sql">select unbits(to_bits(array(1,4,2,3))); -> [1,2,3,4] -</code></pre> -</li> -</ul> -<h1 id="compression">Compression</h1> -<ul> -<li><p><code>deflate(TEXT data [, const int compressionLevel])</code> - Returns a compressed BINARY object by using Deflater. The compression level must be in range [-1,9]</p> -<pre><code class="lang-sql">select base91(deflate('aaaaaaaaaaaaaaaabbbbccc')); -> AA+=kaIM|WTt!+wbGAA -</code></pre> -</li> -<li><p><code>inflate(BINARY compressedData)</code> - Returns a decompressed STRING by using Inflater</p> -<pre><code class="lang-sql">select inflate(unbase91(base91(deflate('aaaaaaaaaaaaaaaabbbbccc')))); -> aaaaaaaaaaaaaaaabbbbccc -</code></pre> -</li> -</ul> <h1 id="mapreduce">MapReduce</h1> <ul> <li><p><code>distcache_gets(filepath, key, default_value [, parseKey])</code> - Returns map<key_type, value_type>|value_type</p> @@ -2398,9 +2655,10 @@ select array_remove(array("aaa","bbb"),"bbb"); </li> <li><p><code>rowid()</code> - Returns a generated row id of a form {TASK_ID}-{SEQUENCE_NUMBER}</p> </li> -<li><p><code>rownum()</code> - Returns a generated row number in long</p> -<pre><code>returns sprintf(`%d%04d`,sequence,taskId) as long -</code></pre></li> +<li><p><code>rownum()</code> - Returns a generated row number <code>sprintf(</code>%d%04d<code>,sequence,taskId)</code> in long</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> <span class="hljs-keyword">rownum</span>() <span class="hljs-keyword">as</span> <span class="hljs-keyword">rownum</span>, xxx <span class="hljs-keyword">from</span> ... +</code></pre> +</li> <li><p><code>taskid()</code> - Returns the value of mapred.task.partition</p> </li> </ul> @@ -2415,42 +2673,83 @@ select array_remove(array("aaa","bbb"),"bbb"); <ul> <li><code>transpose_and_dot(array<number> matrix0_row, array<number> matrix1_row)</code> - Returns dot(matrix0.T, matrix1) as array<array<double>>, shape = (matrix0.#cols, matrix1.#cols)</li> </ul> +<h1 id="sanity-checks">Sanity Checks</h1> +<ul> +<li><p><code>assert(boolean condition)</code> or <em>FUNC</em>(boolean condition, string errMsg)- Throws HiveException if condition is not met</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> <span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">FROM</span> stock_price <span class="hljs-keyword">WHERE</span> assert(price > <span class="hljs-number">0.0</span>); +<span class="hljs-keyword">SELECT</span> <span class="hljs-keyword">count</span>(<span class="hljs-number">1</span>) <span class="hljs-keyword">FROM</span> stock_price <span class="hljs-keyword">WHERE</span> assert(price > <span class="hljs-number">0.0</span>, <span class="hljs-string">'price MUST be more than 0.0'</span>) +</code></pre> +</li> +<li><p><code>raise_error()</code> or <em>FUNC</em>(string msg) - Throws an error</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> product_id, price, raise_error(<span class="hljs-string">'Found an invalid record'</span>) <span class="hljs-keyword">FROM</span> xxx <span class="hljs-keyword">WHERE</span> price < <span class="hljs-number">0.0</span> +</code></pre> +</li> +</ul> <h1 id="text-processing">Text processing</h1> <ul> <li><p><code>base91(BINARY bin)</code> - Convert the argument from binary to a BASE91 string</p> -<pre><code class="lang-sql">select base91(deflate('aaaaaaaaaaaaaaaabbbbccc')); -> AA+=kaIM|WTt!+wbGAA +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> base91(deflate(<span class="hljs-string">'aaaaaaaaaaaaaaaabbbbccc'</span>)); + AA+=kaIM|WTt!+wbGAA </code></pre> </li> <li><p><code>is_stopword(string word)</code> - Returns whether English stopword or not</p> </li> <li><p><code>normalize_unicode(string str [, string form])</code> - Transforms <code>str</code> with the specified normalization form. The <code>form</code> takes one of NFC (default), NFD, NFKC, or NFKD</p> -<pre><code class="lang-sql">select normalize_unicode('ハンカクカナ','NFKC'); -> ハンカクカナ +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> normalize_unicode(<span class="hljs-string">'ハンカクカナ'</span>,<span class="hljs-string">'NFKC'</span>); + ハンカクカナ -select normalize_unicode('㈱㌧㌦Ⅲ','NFKC'); -> (株)トンドルIII +<span class="hljs-keyword">SELECT</span> normalize_unicode(<span class="hljs-string">'㈱㌧㌦Ⅲ'</span>,<span class="hljs-string">'NFKC'</span>); + (株)トンドルIII </code></pre> </li> <li><p><code>singularize(string word)</code> - Returns singular form of a given English word</p> -<pre><code class="lang-sql">select singularize(lower("Apples")); +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> singularize(<span class="hljs-keyword">lower</span>(<span class="hljs-string">"Apples"</span>)); -> "apple" + "apple" </code></pre> </li> -<li><p><code>split_words(string query [, string regex])</code> - Returns an array<text> containing split strings</p> +<li><p><code>split_words(string query [, string regex])</code> - Returns an array<text> containing splitted strings</p> </li> <li><p><code>tokenize(string englishText [, boolean toLowerCase])</code> - Returns tokenized words in array<string></p> </li> <li><p><code>unbase91(string)</code> - Convert a BASE91 string to a binary</p> -<pre><code class="lang-sql">select inflate(unbase91(base91(deflate('aaaaaaaaaaaaaaaabbbbccc')))); -> aaaaaaaaaaaaaaaabbbbccc +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> inflate(unbase91(base91(deflate(<span class="hljs-string">'aaaaaaaaaaaaaaaabbbbccc'</span>)))); + aaaaaaaaaaaaaaaabbbbccc </code></pre> </li> <li><p><code>word_ngrams(array<string> words, int minSize, int maxSize])</code> - Returns list of n-grams for given words, where <code>minSize &lt;= n &lt;= maxSize</code></p> -<pre><code class="lang-sql">select word_ngrams(tokenize('Machine learning is fun!', true), 1, 2); +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> word_ngrams(tokenize(<span class="hljs-string">'Machine learning is fun!'</span>, <span class="hljs-literal">true</span>), <span class="hljs-number">1</span>, <span class="hljs-number">2</span>); -> ["machine","machine learning","learning","learning is","is","is fun","fun"] + ["machine","machine learning","learning","learning is","is","is fun","fun"] +</code></pre> +</li> +</ul> +<h1 id="timeseries">Timeseries</h1> +<ul> +<li><code>moving_avg(NUMBER value, const int windowSize)</code> - Returns moving average of a time series using a given window<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> moving_avg(x, <span class="hljs-number">3</span>) <span class="hljs-keyword">FROM</span> (<span class="hljs-keyword">SELECT</span> explode(<span class="hljs-built_in">array</span>(<span class="hljs-number">1.0</span>,<span class="hljs-number">2.0</span>,<span class="hljs-number">3.0</span>,<span class="hljs-number">4.0</span>,<span class="hljs-number">5.0</span>,<span class="hljs-number">6.0</span>,<span class="hljs-number">7.0</span>)) <span class="hljs-keyword">as</span> x) series; + 1.0 + 1.5 + 2.0 + 3.0 + 4.0 + 5.0 + 6.0 +</code></pre> +</li> +</ul> +<h1 id="vector">Vector</h1> +<ul> +<li><p><code>vector_add(array<NUMBER> x, array<NUMBER> y)</code> - Perform vector ADD operation.</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> vector_add(<span class="hljs-built_in">array</span>(<span class="hljs-number">1.0</span>,<span class="hljs-number">2.0</span>,<span class="hljs-number">3.0</span>), <span class="hljs-built_in">array</span>(<span class="hljs-number">2</span>, <span class="hljs-number">3</span>, <span class="hljs-number">4</span>)); +[3.0,5.0,7.0] +</code></pre> +</li> +<li><p><code>vector_dot(array<NUMBER> x, array<NUMBER> y)</code> - Performs vector dot product.</p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> vector_dot(<span class="hljs-built_in">array</span>(<span class="hljs-number">1.0</span>,<span class="hljs-number">2.0</span>,<span class="hljs-number">3.0</span>),<span class="hljs-built_in">array</span>(<span class="hljs-number">2.0</span>,<span class="hljs-number">3.0</span>,<span class="hljs-number">4.0</span>)); +20 + +<span class="hljs-keyword">SELECT</span> vector_dot(<span class="hljs-built_in">array</span>(<span class="hljs-number">1.0</span>,<span class="hljs-number">2.0</span>,<span class="hljs-number">3.0</span>),<span class="hljs-number">2</span>); +[2.0,4.0,6.0] </code></pre> </li> </ul> @@ -2460,23 +2759,49 @@ select normalize_unicode('㈱㌧㌦Ⅲ','NFKC </li> <li><p><code>each_top_k(int K, Object group, double cmpKey, *)</code> - Returns top-K values (or tail-K values when k is less than 0)</p> </li> -<li><p><code>generate_series(const int|bigint start, const int|bigint end)</code> - Generate a series of values, from start to end. A similar function to PostgreSQL's <code>generate_serics</code>. <a href="http://www.postgresql.org/docs/current/static/functions-srf.html" target="_blank">http://www.postgresql.org/docs/current/static/functions-srf.html</a></p> -<pre><code class="lang-sql"><span class="hljs-keyword">select</span> generate_series(<span class="hljs-number">1</span>,<span class="hljs-number">9</span>); +<li><p><code>generate_series(const int|bigint start, const int|bigint end)</code> - Generate a series of values, from start to end. A similar function to PostgreSQL's <a href="http://www.postgresql.org/docs/current/static/functions-srf.html" target="_blank">generate_serics</a></p> +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> generate_series(<span class="hljs-number">2</span>,<span class="hljs-number">4</span>); -1 -2 -3 -4 -5 -6 -7 -8 -9 + 2 + 3 + 4 + +<span class="hljs-keyword">SELECT</span> generate_series(<span class="hljs-number">5</span>,<span class="hljs-number">1</span>,<span class="hljs-number">-2</span>); + + 5 + 3 + 1 + +<span class="hljs-keyword">SELECT</span> generate_series(<span class="hljs-number">4</span>,<span class="hljs-number">3</span>); + + (no return) + +<span class="hljs-keyword">SELECT</span> <span class="hljs-keyword">date_add</span>(<span class="hljs-keyword">current_date</span>(),<span class="hljs-keyword">value</span>),<span class="hljs-keyword">value</span> <span class="hljs-keyword">from</span> (<span class="hljs-keyword">SELECT</span> generate_series(<span class="hljs-number">1</span>,<span class="hljs-number">3</span>)) t; + + 2018-04-21 1 + 2018-04-22 2 + 2018-04-23 3 + +WITH input as ( + <span class="hljs-keyword">SELECT</span> <span class="hljs-number">1</span> <span class="hljs-keyword">as</span> c1, <span class="hljs-number">10</span> <span class="hljs-keyword">as</span> c2, <span class="hljs-number">3</span> <span class="hljs-keyword">as</span> step + <span class="hljs-keyword">UNION</span> ALL + <span class="hljs-keyword">SELECT</span> <span class="hljs-number">10</span>, <span class="hljs-number">2</span>, <span class="hljs-number">-3</span> +) +<span class="hljs-keyword">SELECT</span> generate_series(c1, c2, step) <span class="hljs-keyword">as</span> series +<span class="hljs-keyword">FROM</span> <span class="hljs-keyword">input</span>; + + 1 + 4 + 7 + 10 + 10 + 7 + 4 </code></pre> </li> <li><p><code>try_cast(ANY src, const string typeName)</code> - Explicitly cast a value as a type. Returns null if cast fails.</p> -<pre><code class="lang-sql">Usage: <span class="hljs-keyword">select</span> <span class="hljs-keyword">try_cast</span>(<span class="hljs-built_in">array</span>(<span class="hljs-number">1.0</span>,<span class="hljs-number">2.0</span>,<span class="hljs-number">3.0</span>), <span class="hljs-string">'array<string>'</span>) - <span class="hljs-keyword">select</span> <span class="hljs-keyword">try_cast</span>(<span class="hljs-keyword">map</span>(<span class="hljs-string">'A'</span>,<span class="hljs-number">10</span>,<span class="hljs-string">'B'</span>,<span class="hljs-number">20</span>,<span class="hljs-string">'C'</span>,<span class="hljs-number">30</span>), <span class="hljs-string">'map<string,double>'</span>) +<pre><code class="lang-sql"><span class="hljs-keyword">SELECT</span> <span class="hljs-keyword">try_cast</span>(<span class="hljs-built_in">array</span>(<span class="hljs-number">1.0</span>,<span class="hljs-number">2.0</span>,<span class="hljs-number">3.0</span>), <span class="hljs-string">'array<string>'</span>) +<span class="hljs-keyword">SELECT</span> <span class="hljs-keyword">try_cast</span>(<span class="hljs-keyword">map</span>(<span class="hljs-string">'A'</span>,<span class="hljs-number">10</span>,<span class="hljs-string">'B'</span>,<span class="hljs-number">20</span>,<span class="hljs-string">'C'</span>,<span class="hljs-number">30</span>), <span class="hljs-string">'map<string,double>'</span>) </code></pre> </li> <li><p><code>x_rank(KEY)</code> - Generates a pseudo sequence number starting from 1 for each key</p> @@ -2537,7 +2862,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"List of Generic Hivemall Functions","level":"2.1","depth":1,"next":{"title":"Efficient Top-K Query Processing","level":"2.2","depth":1,"path":"misc/topk.md","ref":"misc/topk.md","articles":[]},"previous":{"title":"Map-side join causes ClassCastException on Tez","level":"1.6.5","depth":2,"path":"troubleshooting/mapjoin_classcastex.md","ref":"troubleshooting/mapjoin_classcastex.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{" url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/eb ook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"misc/generic_funcs.md","mtime":"2018-04-25T08:11:03.000Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-04-26T03:55:31.199Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"List of Generic Hivemall Functions","level":"2.1","depth":1,"next":{"title":"Efficient Top-K Query Processing","level":"2.2","depth":1,"path":"misc/topk.md","ref":"misc/topk.md","articles":[]},"previous":{"title":"Map-side join causes ClassCastException on Tez","level":"1.6.5","depth":2,"path":"troubleshooting/mapjoin_classcastex.md","ref":"troubleshooting/mapjoin_classcastex.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{" url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/eb ook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"misc/generic_funcs.md","mtime":"2018-06-06T08:56:30.932Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-06-06T09:01:20.330Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/e1e876db/userguide/misc/prediction.html ---------------------------------------------------------------------- diff --git a/userguide/misc/prediction.html b/userguide/misc/prediction.html index eb9754c..fba9294 100644 --- a/userguide/misc/prediction.html +++ b/userguide/misc/prediction.html @@ -2463,7 +2463,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"How Prediction Works","level":"5.1","depth":1,"next":{"title":"Binary Classification","level":"6.1","depth":1,"path":"binaryclass/general.md","ref":"binaryclass/general.md","articles":[]},"previous":{"title":"Logistic Regression data generation","level":"4.5.1","depth":2,"path":"eval/lr_datagen.md","ref":"eval/lr_datagen.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"s plitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true}," anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"misc/prediction.md","mtime":"2018-02-01T07:43:38.000Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-04-26T03:55:31.199Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"How Prediction Works","level":"5.1","depth":1,"next":{"title":"Binary Classification","level":"6.1","depth":1,"path":"binaryclass/general.md","ref":"binaryclass/general.md","articles":[]},"previous":{"title":"Logistic Regression data generation","level":"4.5.1","depth":2,"path":"eval/lr_datagen.md","ref":"eval/lr_datagen.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"s plitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true}," anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"misc/prediction.md","mtime":"2017-12-11T08:48:01.135Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-06-06T09:01:20.330Z"},"basePath":"..","book":{"language":""}}); }); </script> </div> http://git-wip-us.apache.org/repos/asf/incubator-hivemall-site/blob/e1e876db/userguide/misc/tokenizer.html ---------------------------------------------------------------------- diff --git a/userguide/misc/tokenizer.html b/userguide/misc/tokenizer.html index f289e11..d445f24 100644 --- a/userguide/misc/tokenizer.html +++ b/userguide/misc/tokenizer.html @@ -2342,7 +2342,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda <script> var gitbook = gitbook || []; gitbook.push(function() { - gitbook.page.hasChanged({"page":{"title":"Text Tokenizer","level":"2.3","depth":1,"next":{"title":"Approximate Aggregate Functions","level":"2.4","depth":1,"path":"misc/approx.md","ref":"misc/approx.md","articles":[]},"previous":{"title":"Efficient Top-K Query Processing","level":"2.2","depth":1,"path":"misc/topk.md","ref":"misc/topk.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"down loadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2, h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"misc/tokenizer.md","mtime":"2018-02-20T20:51:34.000Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-04-26T03:55:31.199Z"},"basePath":"..","book":{"language":""}}); + gitbook.page.hasChanged({"page":{"title":"Text Tokenizer","level":"2.3","depth":1,"next":{"title":"Approximate Aggregate Functions","level":"2.4","depth":1,"path":"misc/approx.md","ref":"misc/approx.md","articles":[]},"previous":{"title":"Efficient Top-K Query Processing","level":"2.2","depth":1,"path":"misc/topk.md","ref":"misc/topk.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","sitemap","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"down loadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"sitemap":{"hostname":"http://hivemall.incubator.apache.org/"},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2, h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"http://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"misc/tokenizer.md","mtime":"2017-12-26T05:23:21.742Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-06-06T09:01:20.330Z"},"basePath":"..","book":{"language":""}}); }); </script> </div>
