Added: tajo/site/docs/devel/table_management/csv.html
URL: 
http://svn.apache.org/viewvc/tajo/site/docs/devel/table_management/csv.html?rev=1644656&view=auto
==============================================================================
--- tajo/site/docs/devel/table_management/csv.html (added)
+++ tajo/site/docs/devel/table_management/csv.html Thu Dec 11 14:41:20 2014
@@ -0,0 +1,333 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  
+  <title>CSV (TextFile) &mdash; Apache Tajo 0.8.0 documentation</title>
+  
+
+  
+  
+
+  
+  <link 
href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700'
 rel='stylesheet' type='text/css'>
+
+  
+  
+    
+
+  
+
+  
+  
+    <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+  
+
+  
+    <link rel="top" title="Apache Tajo 0.8.0 documentation" 
href="../index.html"/>
+        <link rel="up" title="File Formats" href="file_formats.html"/>
+        <link rel="next" title="RCFile" href="rcfile.html"/>
+        <link rel="prev" title="File Formats" href="file_formats.html"/> 
+
+  
+  <script 
src="https://cdnjs.cloudflare.com/ajax/libs/modernizr/2.6.2/modernizr.min.js";></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+  <div class="wy-grid-for-nav">
+
+    
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-nav-search">
+        <a href="../index.html" class="fa fa-home"> Apache Tajo</a>
+        <div role="search">
+  <form id ="rtd-search-form" class="wy-form" action="../search.html" 
method="get">
+    <input type="text" name="q" placeholder="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+      </div>
+
+      <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" 
aria-label="main navigation">
+        
+        
+            <ul class="current">
+<li class="toctree-l1"><a class="reference internal" 
href="../introduction.html">Introduction</a></li>
+<li class="toctree-l1"><a class="reference internal" 
href="../getting_started.html">Getting Started</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/prerequisites.html">Prerequisites</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/downloading_source.html">Dowload and unpack the source 
code</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/building.html">Build source code</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/local_setup.html">Setting up a local Tajo 
cluster</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/first_query.html">First query execution</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../configuration.html">Configuration</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/preliminary.html">Preliminary</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/cluster_setup.html">Cluster Setup</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/tajo_master_configuration.html">Tajo Master 
Configuration</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/worker_configuration.html">Worker Configuration</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/catalog_configuration.html">Catalog 
Configuration</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/configuration_defaults.html">Configuration 
Defaults</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/ha_configuration.html">High Availability for 
TajoMaster</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="../tsql.html">Tajo 
Shell (TSQL)</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/meta_command.html">Meta Commands</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/dfs_command.html">Executing HDFS commands</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/variables.html">Session Variables</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/admin_command.html">Administration Commands</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/intro.html">Introducing to TSQL</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/single_command.html">Executing a single command</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/execute_file.html">Executing Queries from Files</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/background_command.html">Executing as background process</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../sql_language.html">SQL Language</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/data_model.html">Data Model</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/ddl.html">Data Definition Language</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/insert.html">INSERT (OVERWRITE) INTO</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/queries.html">Queries</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/sql_expression.html">SQL Expressions</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/predicates.html">Predicates</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../functions.html">Functions</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../functions/math_func_and_operators.html">Math Functions and 
Operators</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../functions/string_func_and_operators.html">String Functions and 
Operators</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../functions/datetime_func_and_operators.html">DateTime Functions and 
Operators</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../functions/network_func_and_operators.html">Network Functions and 
Operators</a></li>
+</ul>
+</li>
+<li class="toctree-l1 current"><a class="reference internal" 
href="../table_management.html">Table Management</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" 
href="table_overview.html">Overview of Tajo Tables</a></li>
+<li class="toctree-l2 current"><a class="reference internal" 
href="file_formats.html">File Formats</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="compression.html">Compression</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../table_partitioning.html">Table Partitioning</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../partitioning/intro_to_partitioning.html">Introduction to 
Partitioning</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../partitioning/column_partitioning.html">Column Partitioning</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../partitioning/range_partitioning.html">Range Partitioning</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../partitioning/hash_partitioning.html">Hash Partitioning</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../backup_and_restore.html">Backup and Restore</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../backup_and_restore/catalog.html">Backup and Restore Catalog</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../hcatalog_integration.html">HCatalog Integration</a></li>
+<li class="toctree-l1"><a class="reference internal" 
href="../jdbc_driver.html">Tajo JDBC Driver</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../jdbc_driver.html#how-to-get-jdbc-driver">How to get JDBC 
driver</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../jdbc_driver.html#setting-the-classpath">Setting the CLASSPATH</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../jdbc_driver.html#an-example-jdbc-client">An Example JDBC 
Client</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../jdbc_driver.html#faq">FAQ</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../tajo_client_api.html">Tajo Client API</a></li>
+<li class="toctree-l1"><a class="reference internal" 
href="../faq.html">FAQ</a></li>
+</ul>
+
+        
+      </div>
+      &nbsp;
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+      
+      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+        <a href="../index.html">Apache Tajo</a>
+      </nav>
+
+
+      
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="breadcrumbs navigation">
+  <ul class="wy-breadcrumbs">
+    <li><a href="../index.html">Docs</a> &raquo;</li>
+      
+          <li><a href="../table_management.html">Table Management</a> 
&raquo;</li>
+      
+          <li><a href="file_formats.html">File Formats</a> &raquo;</li>
+      
+    <li>CSV (TextFile)</li>
+      <li class="wy-breadcrumbs-aside">
+        
+          <a href="../_sources/table_management/csv.txt" rel="nofollow"> View 
page source</a>
+        
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main">
+            
+  <div class="section" id="csv-textfile">
+<h1>CSV (TextFile)<a class="headerlink" href="#csv-textfile" title="Permalink 
to this headline">¶</a></h1>
+<p>A character-separated values (CSV) file represents a tabular data set 
consisting of rows and columns.
+Each row is a plan-text line. A line is usually broken by a character line 
feed <tt class="docutils literal"><span class="pre">\n</span></tt> or 
carriage-return <tt class="docutils literal"><span class="pre">\r</span></tt>.
+The line feed <tt class="docutils literal"><span class="pre">\n</span></tt> is 
the default delimiter in Tajo. Each record consists of multiple fields, 
separated by
+some other character or string, most commonly a literal vertical bar <tt 
class="docutils literal"><span class="pre">|</span></tt>, comma <tt 
class="docutils literal"><span class="pre">,</span></tt> or tab <tt 
class="docutils literal"><span class="pre">\t</span></tt>.
+The vertical bar is used as the default field delimiter in Tajo.</p>
+<div class="section" id="how-to-create-a-csv-table">
+<h2>How to Create a CSV Table ?<a class="headerlink" 
href="#how-to-create-a-csv-table" title="Permalink to this headline">¶</a></h2>
+<p>If you are not familiar with the <tt class="docutils literal"><span 
class="pre">CREATE</span> <span class="pre">TABLE</span></tt> statement, please 
refer to the Data Definition Language <a class="reference internal" 
href="../sql_language/ddl.html"><em>Data Definition Language</em></a>.</p>
+<p>In order to specify a certain file format for your table, you need to use 
the <tt class="docutils literal"><span class="pre">USING</span></tt> clause in 
your <tt class="docutils literal"><span class="pre">CREATE</span> <span 
class="pre">TABLE</span></tt>
+statement. The below is an example statement for creating a table using CSV 
files.</p>
+<div class="highlight-sql"><div class="highlight"><pre><span 
class="k">CREATE</span> <span class="k">TABLE</span>
+ <span class="n">table1</span> <span class="p">(</span>
+   <span class="n">id</span> <span class="nb">int</span><span 
class="p">,</span>
+   <span class="n">name</span> <span class="nb">text</span><span 
class="p">,</span>
+   <span class="n">score</span> <span class="nb">float</span><span 
class="p">,</span>
+   <span class="k">type</span> <span class="nb">text</span>
+ <span class="p">)</span> <span class="k">USING</span> <span 
class="n">CSV</span><span class="p">;</span>
+</pre></div>
+</div>
+</div>
+<div class="section" id="physical-properties">
+<h2>Physical Properties<a class="headerlink" href="#physical-properties" 
title="Permalink to this headline">¶</a></h2>
+<p>Some table storage formats provide parameters for enabling or disabling 
features and adjusting physical parameters.
+The <tt class="docutils literal"><span class="pre">WITH</span></tt> clause in 
the CREATE TABLE statement allows users to set those parameters.</p>
+<p>Now, the CSV storage format provides the following physical properties.</p>
+<ul class="simple">
+<li><tt class="docutils literal"><span class="pre">text.delimiter</span></tt>: 
delimiter character. <tt class="docutils literal"><span 
class="pre">|</span></tt> or <tt class="docutils literal"><span 
class="pre">\u0001</span></tt> is usually used, and the default field delimiter 
is <tt class="docutils literal"><span class="pre">|</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">text.null</span></tt>: NULL 
character. The default NULL character is an empty string <tt class="docutils 
literal"><span class="pre">''</span></tt>. Hive&#8217;s default NULL character 
is <tt class="docutils literal"><span class="pre">'\\N'</span></tt>.</li>
+<li><tt class="docutils literal"><span 
class="pre">compression.codec</span></tt>: Compression codec. You can enable 
compression feature and set specified compression algorithm. The compression 
algorithm used to compress files. The compression codec name should be the 
fully qualified class name inherited from <a class="reference external" 
href="https://hadoop.apache.org/docs/current/api/org/apache/hadoop/io/compress/CompressionCodec.html";>org.apache.hadoop.io.compress.CompressionCodec</a>.
 By default, compression is disabled.</li>
+<li><tt class="docutils literal"><span class="pre">csvfile.serde</span></tt> 
(deprecated): custom (De)serializer class. <tt class="docutils literal"><span 
class="pre">org.apache.tajo.storage.TextSerializerDeserializer</span></tt> is 
the default (De)serializer class.</li>
+<li><tt class="docutils literal"><span class="pre">timezone</span></tt>: the 
time zone that the table uses for writting. When table rows are read or 
written, <tt class="docutils literal"><span class="pre">`timestamp`</span></tt> 
and <tt class="docutils literal"><span class="pre">`time`</span></tt> column 
values are adjusted by this timezone if it is set. Time zone can be an 
abbreviation form like &#8216;PST&#8217; or &#8216;DST&#8217;. Also, it accepts 
an offset-based form like &#8216;UTC+9&#8217; or a location-based form like 
&#8216;Asia/Seoul&#8217;.</li>
+<li><tt class="docutils literal"><span 
class="pre">text.error-tolerance.max-num</span></tt>: the maximum number of 
permissible parsing errors. This value should be an integer value. By default, 
<tt class="docutils literal"><span 
class="pre">text.error-tolerance.max-num</span></tt> is <tt class="docutils 
literal"><span class="pre">0</span></tt>. According to the value, parsing 
errors will be handled in different ways.
+* If <tt class="docutils literal"><span 
class="pre">text.error-tolerance.max-num</span> <span class="pre">&lt;</span> 
<span class="pre">0</span></tt>, all parsing errors are ignored.
+* If <tt class="docutils literal"><span 
class="pre">text.error-tolerance.max-num</span> <span class="pre">==</span> 
<span class="pre">0</span></tt>, any parsing error is not allowed. If any error 
occurs, the query will be failed. (default)
+* If <tt class="docutils literal"><span 
class="pre">text.error-tolerance.max-num</span> <span class="pre">&gt;</span> 
<span class="pre">0</span></tt>, the given number of parsing errors in each 
task will be pemissible.</li>
+</ul>
+<p>The following example is to set a custom field delimiter, NULL character, 
and compression codec:</p>
+<div class="highlight-sql"><div class="highlight"><pre><span 
class="k">CREATE</span> <span class="k">TABLE</span> <span 
class="n">table1</span> <span class="p">(</span>
+ <span class="n">id</span> <span class="nb">int</span><span class="p">,</span>
+ <span class="n">name</span> <span class="nb">text</span><span 
class="p">,</span>
+ <span class="n">score</span> <span class="nb">float</span><span 
class="p">,</span>
+ <span class="k">type</span> <span class="nb">text</span>
+<span class="p">)</span> <span class="k">USING</span> <span 
class="n">CSV</span> <span class="k">WITH</span><span class="p">(</span><span 
class="s1">&#39;text.delimiter&#39;</span><span class="o">=</span><span 
class="s1">&#39;\u0001&#39;</span><span class="p">,</span>
+                 <span class="s1">&#39;text.null&#39;</span><span 
class="o">=</span><span class="s1">&#39;\\N&#39;</span><span class="p">,</span>
+                 <span class="s1">&#39;compression.codec&#39;</span><span 
class="o">=</span><span 
class="s1">&#39;org.apache.hadoop.io.compress.SnappyCodec&#39;</span><span 
class="p">);</span>
+</pre></div>
+</div>
+<div class="admonition warning">
+<p class="first admonition-title">Warning</p>
+<p class="last">Be careful when using <tt class="docutils literal"><span 
class="pre">\n</span></tt> as the field delimiter because CSV uses <tt 
class="docutils literal"><span class="pre">\n</span></tt> as the line delimiter.
+At the moment, Tajo does not provide a way to specify the line delimiter.</p>
+</div>
+</div>
+<div class="section" id="custom-de-serializer">
+<h2>Custom (De)serializer<a class="headerlink" href="#custom-de-serializer" 
title="Permalink to this headline">¶</a></h2>
+<p>The CSV storage format not only provides reading and writing interfaces for 
CSV data but also allows users to process custom
+plan-text file formats with user-defined (De)serializer classes.
+For example, with custom (de)serializers, Tajo can process JSON file formats 
or any specialized plan-text file formats.</p>
+<p>In order to specify a custom (De)serializer, set a physical property <tt 
class="docutils literal"><span class="pre">csvfile.serde</span></tt>.
+The property value should be a fully qualified class name.</p>
+<p>For example:</p>
+<div class="highlight-sql"><div class="highlight"><pre><span 
class="k">CREATE</span> <span class="k">TABLE</span> <span 
class="n">table1</span> <span class="p">(</span>
+ <span class="n">id</span> <span class="nb">int</span><span class="p">,</span>
+ <span class="n">name</span> <span class="nb">text</span><span 
class="p">,</span>
+ <span class="n">score</span> <span class="nb">float</span><span 
class="p">,</span>
+ <span class="k">type</span> <span class="nb">text</span>
+<span class="p">)</span> <span class="k">USING</span> <span 
class="n">CSV</span> <span class="k">WITH</span> <span class="p">(</span><span 
class="s1">&#39;csvfile.serde&#39;</span><span class="o">=</span><span 
class="s1">&#39;org.my.storage.CustomSerializerDeserializer&#39;</span><span 
class="p">)</span>
+</pre></div>
+</div>
+</div>
+<div class="section" id="null-value-handling-issues">
+<h2>Null Value Handling Issues<a class="headerlink" 
href="#null-value-handling-issues" title="Permalink to this 
headline">¶</a></h2>
+<p>In default, NULL character in CSV files is an empty string <tt 
class="docutils literal"><span class="pre">''</span></tt>.
+In other words, an empty field is basically recognized as a NULL value in Tajo.
+If a field domain is <tt class="docutils literal"><span 
class="pre">TEXT</span></tt>, an empty field is recognized as a string value 
<tt class="docutils literal"><span class="pre">''</span></tt> instead of NULL 
value.
+Besides, You can also use your own NULL character by specifying a physical 
property <tt class="docutils literal"><span 
class="pre">text.null</span></tt>.</p>
+</div>
+<div class="section" id="compatibility-issues-with-apache-hive">
+<h2>Compatibility Issues with Apache Hive™<a class="headerlink" 
href="#compatibility-issues-with-apache-hive" title="Permalink to this 
headline">¶</a></h2>
+<p>CSV files generated in Tajo can be processed directly by Apache Hive™ 
without further processing.
+In this section, we explain some compatibility issue for users who use both 
Hive and Tajo.</p>
+<p>If you set a custom field delimiter, the CSV tables cannot be directly used 
in Hive.
+In order to specify the custom field delimiter in Hive, you need to use <tt 
class="docutils literal"><span class="pre">ROW</span> <span 
class="pre">FORMAT</span> <span class="pre">DELIMITED</span> <span 
class="pre">FIELDS</span> <span class="pre">TERMINATED</span> <span 
class="pre">BY</span></tt>
+clause in a Hive&#8217;s <tt class="docutils literal"><span 
class="pre">CREATE</span> <span class="pre">TABLE</span></tt> statement as 
follows:</p>
+<div class="highlight-sql"><div class="highlight"><pre><span 
class="k">CREATE</span> <span class="k">TABLE</span> <span 
class="n">table1</span> <span class="p">(</span><span class="n">id</span> <span 
class="nb">int</span><span class="p">,</span> <span class="n">name</span> <span 
class="n">string</span><span class="p">,</span> <span class="n">score</span> 
<span class="nb">float</span><span class="p">,</span> <span 
class="k">type</span> <span class="n">string</span><span class="p">)</span>
+<span class="k">ROW</span> <span class="n">FORMAT</span> <span 
class="n">DELIMITED</span> <span class="n">FIELDS</span> <span 
class="n">TERMINATED</span> <span class="k">BY</span> <span 
class="s1">&#39;|&#39;</span>
+<span class="n">STORED</span> <span class="k">AS</span> <span 
class="n">TEXTFILE</span>
+</pre></div>
+</div>
+<p>To the best of our knowledge, there is not way to specify a custom NULL 
character in Hive.</p>
+</div>
+</div>
+
+
+          </div>
+          <footer>
+  
+    <div class="rst-footer-buttons" role="navigation" aria-label="footer 
navigation">
+      
+        <a href="rcfile.html" class="btn btn-neutral float-right" 
title="RCFile"/>Next <span class="fa fa-arrow-circle-right"></span></a>
+      
+      
+        <a href="file_formats.html" class="btn btn-neutral" title="File 
Formats"><span class="fa fa-arrow-circle-left"></span> Previous</a>
+      
+    </div>
+  
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>
+        &copy; Copyright 2014, Apache Tajo Team.
+    </p>
+  </div>
+
+  <a href="https://github.com/snide/sphinx_rtd_theme";>Sphinx theme</a> 
provided by <a href="https://readthedocs.org";>Read the Docs</a>
+</footer>
+        </div>
+      </div>
+
+    </section>
+
+  </div>
+  
+
+
+  
+
+    <script type="text/javascript">
+        var DOCUMENTATION_OPTIONS = {
+            URL_ROOT:'../',
+            VERSION:'0.8.0',
+            COLLAPSE_INDEX:false,
+            FILE_SUFFIX:'.html',
+            HAS_SOURCE:  true
+        };
+    </script>
+      <script type="text/javascript" src="../_static/jquery.js"></script>
+      <script type="text/javascript" src="../_static/underscore.js"></script>
+      <script type="text/javascript" src="../_static/doctools.js"></script>
+
+  
+
+  
+  
+    <script type="text/javascript" src="../_static/js/theme.js"></script>
+  
+
+  
+  
+  <script type="text/javascript">
+      jQuery(function () {
+          SphinxRtdTheme.StickyNav.enable();
+      });
+  </script>
+   
+
+</body>
+</html>
\ No newline at end of file

Added: tajo/site/docs/devel/table_management/file_formats.html
URL: 
http://svn.apache.org/viewvc/tajo/site/docs/devel/table_management/file_formats.html?rev=1644656&view=auto
==============================================================================
--- tajo/site/docs/devel/table_management/file_formats.html (added)
+++ tajo/site/docs/devel/table_management/file_formats.html Thu Dec 11 14:41:20 
2014
@@ -0,0 +1,249 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  
+  <title>File Formats &mdash; Apache Tajo 0.8.0 documentation</title>
+  
+
+  
+  
+
+  
+  <link 
href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700'
 rel='stylesheet' type='text/css'>
+
+  
+  
+    
+
+  
+
+  
+  
+    <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+  
+
+  
+    <link rel="top" title="Apache Tajo 0.8.0 documentation" 
href="../index.html"/>
+        <link rel="up" title="Table Management" 
href="../table_management.html"/>
+        <link rel="next" title="CSV (TextFile)" href="csv.html"/>
+        <link rel="prev" title="Overview of Tajo Tables" 
href="table_overview.html"/> 
+
+  
+  <script 
src="https://cdnjs.cloudflare.com/ajax/libs/modernizr/2.6.2/modernizr.min.js";></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+  <div class="wy-grid-for-nav">
+
+    
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-nav-search">
+        <a href="../index.html" class="fa fa-home"> Apache Tajo</a>
+        <div role="search">
+  <form id ="rtd-search-form" class="wy-form" action="../search.html" 
method="get">
+    <input type="text" name="q" placeholder="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+      </div>
+
+      <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" 
aria-label="main navigation">
+        
+        
+            <ul class="current">
+<li class="toctree-l1"><a class="reference internal" 
href="../introduction.html">Introduction</a></li>
+<li class="toctree-l1"><a class="reference internal" 
href="../getting_started.html">Getting Started</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/prerequisites.html">Prerequisites</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/downloading_source.html">Dowload and unpack the source 
code</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/building.html">Build source code</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/local_setup.html">Setting up a local Tajo 
cluster</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/first_query.html">First query execution</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../configuration.html">Configuration</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/preliminary.html">Preliminary</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/cluster_setup.html">Cluster Setup</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/tajo_master_configuration.html">Tajo Master 
Configuration</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/worker_configuration.html">Worker Configuration</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/catalog_configuration.html">Catalog 
Configuration</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/configuration_defaults.html">Configuration 
Defaults</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/ha_configuration.html">High Availability for 
TajoMaster</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="../tsql.html">Tajo 
Shell (TSQL)</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/meta_command.html">Meta Commands</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/dfs_command.html">Executing HDFS commands</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/variables.html">Session Variables</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/admin_command.html">Administration Commands</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/intro.html">Introducing to TSQL</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/single_command.html">Executing a single command</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/execute_file.html">Executing Queries from Files</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/background_command.html">Executing as background process</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../sql_language.html">SQL Language</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/data_model.html">Data Model</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/ddl.html">Data Definition Language</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/insert.html">INSERT (OVERWRITE) INTO</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/queries.html">Queries</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/sql_expression.html">SQL Expressions</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/predicates.html">Predicates</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../functions.html">Functions</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../functions/math_func_and_operators.html">Math Functions and 
Operators</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../functions/string_func_and_operators.html">String Functions and 
Operators</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../functions/datetime_func_and_operators.html">DateTime Functions and 
Operators</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../functions/network_func_and_operators.html">Network Functions and 
Operators</a></li>
+</ul>
+</li>
+<li class="toctree-l1 current"><a class="reference internal" 
href="../table_management.html">Table Management</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" 
href="table_overview.html">Overview of Tajo Tables</a></li>
+<li class="toctree-l2 current"><a class="current reference internal" 
href="">File Formats</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="compression.html">Compression</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../table_partitioning.html">Table Partitioning</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../partitioning/intro_to_partitioning.html">Introduction to 
Partitioning</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../partitioning/column_partitioning.html">Column Partitioning</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../partitioning/range_partitioning.html">Range Partitioning</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../partitioning/hash_partitioning.html">Hash Partitioning</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../backup_and_restore.html">Backup and Restore</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../backup_and_restore/catalog.html">Backup and Restore Catalog</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../hcatalog_integration.html">HCatalog Integration</a></li>
+<li class="toctree-l1"><a class="reference internal" 
href="../jdbc_driver.html">Tajo JDBC Driver</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../jdbc_driver.html#how-to-get-jdbc-driver">How to get JDBC 
driver</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../jdbc_driver.html#setting-the-classpath">Setting the CLASSPATH</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../jdbc_driver.html#an-example-jdbc-client">An Example JDBC 
Client</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../jdbc_driver.html#faq">FAQ</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../tajo_client_api.html">Tajo Client API</a></li>
+<li class="toctree-l1"><a class="reference internal" 
href="../faq.html">FAQ</a></li>
+</ul>
+
+        
+      </div>
+      &nbsp;
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+      
+      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+        <a href="../index.html">Apache Tajo</a>
+      </nav>
+
+
+      
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="breadcrumbs navigation">
+  <ul class="wy-breadcrumbs">
+    <li><a href="../index.html">Docs</a> &raquo;</li>
+      
+          <li><a href="../table_management.html">Table Management</a> 
&raquo;</li>
+      
+    <li>File Formats</li>
+      <li class="wy-breadcrumbs-aside">
+        
+          <a href="../_sources/table_management/file_formats.txt" 
rel="nofollow"> View page source</a>
+        
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main">
+            
+  <div class="section" id="file-formats">
+<h1>File Formats<a class="headerlink" href="#file-formats" title="Permalink to 
this headline">¶</a></h1>
+<p>Currently, Tajo provides four file formats as follows:</p>
+<div class="toctree-wrapper compound">
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="csv.html">CSV 
(TextFile)</a></li>
+<li class="toctree-l1"><a class="reference internal" 
href="rcfile.html">RCFile</a></li>
+<li class="toctree-l1"><a class="reference internal" 
href="parquet.html">Parquet</a></li>
+<li class="toctree-l1"><a class="reference internal" 
href="sequencefile.html">SequenceFile</a></li>
+</ul>
+</div>
+</div>
+
+
+          </div>
+          <footer>
+  
+    <div class="rst-footer-buttons" role="navigation" aria-label="footer 
navigation">
+      
+        <a href="csv.html" class="btn btn-neutral float-right" title="CSV 
(TextFile)"/>Next <span class="fa fa-arrow-circle-right"></span></a>
+      
+      
+        <a href="table_overview.html" class="btn btn-neutral" title="Overview 
of Tajo Tables"><span class="fa fa-arrow-circle-left"></span> Previous</a>
+      
+    </div>
+  
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>
+        &copy; Copyright 2014, Apache Tajo Team.
+    </p>
+  </div>
+
+  <a href="https://github.com/snide/sphinx_rtd_theme";>Sphinx theme</a> 
provided by <a href="https://readthedocs.org";>Read the Docs</a>
+</footer>
+        </div>
+      </div>
+
+    </section>
+
+  </div>
+  
+
+
+  
+
+    <script type="text/javascript">
+        var DOCUMENTATION_OPTIONS = {
+            URL_ROOT:'../',
+            VERSION:'0.8.0',
+            COLLAPSE_INDEX:false,
+            FILE_SUFFIX:'.html',
+            HAS_SOURCE:  true
+        };
+    </script>
+      <script type="text/javascript" src="../_static/jquery.js"></script>
+      <script type="text/javascript" src="../_static/underscore.js"></script>
+      <script type="text/javascript" src="../_static/doctools.js"></script>
+
+  
+
+  
+  
+    <script type="text/javascript" src="../_static/js/theme.js"></script>
+  
+
+  
+  
+  <script type="text/javascript">
+      jQuery(function () {
+          SphinxRtdTheme.StickyNav.enable();
+      });
+  </script>
+   
+
+</body>
+</html>
\ No newline at end of file

Added: tajo/site/docs/devel/table_management/parquet.html
URL: 
http://svn.apache.org/viewvc/tajo/site/docs/devel/table_management/parquet.html?rev=1644656&view=auto
==============================================================================
--- tajo/site/docs/devel/table_management/parquet.html (added)
+++ tajo/site/docs/devel/table_management/parquet.html Thu Dec 11 14:41:20 2014
@@ -0,0 +1,278 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  
+  <title>Parquet &mdash; Apache Tajo 0.8.0 documentation</title>
+  
+
+  
+  
+
+  
+  <link 
href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700'
 rel='stylesheet' type='text/css'>
+
+  
+  
+    
+
+  
+
+  
+  
+    <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+  
+
+  
+    <link rel="top" title="Apache Tajo 0.8.0 documentation" 
href="../index.html"/>
+        <link rel="up" title="File Formats" href="file_formats.html"/>
+        <link rel="next" title="SequenceFile" href="sequencefile.html"/>
+        <link rel="prev" title="RCFile" href="rcfile.html"/> 
+
+  
+  <script 
src="https://cdnjs.cloudflare.com/ajax/libs/modernizr/2.6.2/modernizr.min.js";></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+  <div class="wy-grid-for-nav">
+
+    
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-nav-search">
+        <a href="../index.html" class="fa fa-home"> Apache Tajo</a>
+        <div role="search">
+  <form id ="rtd-search-form" class="wy-form" action="../search.html" 
method="get">
+    <input type="text" name="q" placeholder="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+      </div>
+
+      <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" 
aria-label="main navigation">
+        
+        
+            <ul class="current">
+<li class="toctree-l1"><a class="reference internal" 
href="../introduction.html">Introduction</a></li>
+<li class="toctree-l1"><a class="reference internal" 
href="../getting_started.html">Getting Started</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/prerequisites.html">Prerequisites</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/downloading_source.html">Dowload and unpack the source 
code</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/building.html">Build source code</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/local_setup.html">Setting up a local Tajo 
cluster</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/first_query.html">First query execution</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../configuration.html">Configuration</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/preliminary.html">Preliminary</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/cluster_setup.html">Cluster Setup</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/tajo_master_configuration.html">Tajo Master 
Configuration</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/worker_configuration.html">Worker Configuration</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/catalog_configuration.html">Catalog 
Configuration</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/configuration_defaults.html">Configuration 
Defaults</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/ha_configuration.html">High Availability for 
TajoMaster</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="../tsql.html">Tajo 
Shell (TSQL)</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/meta_command.html">Meta Commands</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/dfs_command.html">Executing HDFS commands</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/variables.html">Session Variables</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/admin_command.html">Administration Commands</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/intro.html">Introducing to TSQL</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/single_command.html">Executing a single command</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/execute_file.html">Executing Queries from Files</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/background_command.html">Executing as background process</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../sql_language.html">SQL Language</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/data_model.html">Data Model</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/ddl.html">Data Definition Language</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/insert.html">INSERT (OVERWRITE) INTO</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/queries.html">Queries</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/sql_expression.html">SQL Expressions</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/predicates.html">Predicates</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../functions.html">Functions</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../functions/math_func_and_operators.html">Math Functions and 
Operators</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../functions/string_func_and_operators.html">String Functions and 
Operators</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../functions/datetime_func_and_operators.html">DateTime Functions and 
Operators</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../functions/network_func_and_operators.html">Network Functions and 
Operators</a></li>
+</ul>
+</li>
+<li class="toctree-l1 current"><a class="reference internal" 
href="../table_management.html">Table Management</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" 
href="table_overview.html">Overview of Tajo Tables</a></li>
+<li class="toctree-l2 current"><a class="reference internal" 
href="file_formats.html">File Formats</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="compression.html">Compression</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../table_partitioning.html">Table Partitioning</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../partitioning/intro_to_partitioning.html">Introduction to 
Partitioning</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../partitioning/column_partitioning.html">Column Partitioning</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../partitioning/range_partitioning.html">Range Partitioning</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../partitioning/hash_partitioning.html">Hash Partitioning</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../backup_and_restore.html">Backup and Restore</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../backup_and_restore/catalog.html">Backup and Restore Catalog</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../hcatalog_integration.html">HCatalog Integration</a></li>
+<li class="toctree-l1"><a class="reference internal" 
href="../jdbc_driver.html">Tajo JDBC Driver</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../jdbc_driver.html#how-to-get-jdbc-driver">How to get JDBC 
driver</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../jdbc_driver.html#setting-the-classpath">Setting the CLASSPATH</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../jdbc_driver.html#an-example-jdbc-client">An Example JDBC 
Client</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../jdbc_driver.html#faq">FAQ</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../tajo_client_api.html">Tajo Client API</a></li>
+<li class="toctree-l1"><a class="reference internal" 
href="../faq.html">FAQ</a></li>
+</ul>
+
+        
+      </div>
+      &nbsp;
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+      
+      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+        <a href="../index.html">Apache Tajo</a>
+      </nav>
+
+
+      
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="breadcrumbs navigation">
+  <ul class="wy-breadcrumbs">
+    <li><a href="../index.html">Docs</a> &raquo;</li>
+      
+          <li><a href="../table_management.html">Table Management</a> 
&raquo;</li>
+      
+          <li><a href="file_formats.html">File Formats</a> &raquo;</li>
+      
+    <li>Parquet</li>
+      <li class="wy-breadcrumbs-aside">
+        
+          <a href="../_sources/table_management/parquet.txt" rel="nofollow"> 
View page source</a>
+        
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main">
+            
+  <div class="section" id="parquet">
+<h1>Parquet<a class="headerlink" href="#parquet" title="Permalink to this 
headline">¶</a></h1>
+<p>Parquet is a columnar storage format for Hadoop. Parquet is designed to 
make the advantages of compressed,
+efficient columnar data representation available to any project in the Hadoop 
ecosystem,
+regardless of the choice of data processing framework, data model, or 
programming language.
+For more details, please refer to <a class="reference external" 
href="http://parquet.io/";>Parquet File Format</a>.</p>
+<div class="section" id="how-to-create-a-parquet-table">
+<h2>How to Create a Parquet Table?<a class="headerlink" 
href="#how-to-create-a-parquet-table" title="Permalink to this 
headline">¶</a></h2>
+<p>If you are not familiar with <tt class="docutils literal"><span 
class="pre">CREATE</span> <span class="pre">TABLE</span></tt> statement, please 
refer to Data Definition Language <a class="reference internal" 
href="../sql_language/ddl.html"><em>Data Definition Language</em></a>.</p>
+<p>In order to specify a certain file format for your table, you need to use 
the <tt class="docutils literal"><span class="pre">USING</span></tt> clause in 
your <tt class="docutils literal"><span class="pre">CREATE</span> <span 
class="pre">TABLE</span></tt>
+statement. Below is an example statement for creating a table using parquet 
files.</p>
+<div class="highlight-sql"><div class="highlight"><pre><span 
class="k">CREATE</span> <span class="k">TABLE</span> <span 
class="n">table1</span> <span class="p">(</span>
+  <span class="n">id</span> <span class="nb">int</span><span class="p">,</span>
+  <span class="n">name</span> <span class="nb">text</span><span 
class="p">,</span>
+  <span class="n">score</span> <span class="nb">float</span><span 
class="p">,</span>
+  <span class="k">type</span> <span class="nb">text</span>
+<span class="p">)</span> <span class="k">USING</span> <span 
class="n">PARQUET</span><span class="p">;</span>
+</pre></div>
+</div>
+</div>
+<div class="section" id="physical-properties">
+<h2>Physical Properties<a class="headerlink" href="#physical-properties" 
title="Permalink to this headline">¶</a></h2>
+<p>Some table storage formats provide parameters for enabling or disabling 
features and adjusting physical parameters.
+The <tt class="docutils literal"><span class="pre">WITH</span></tt> clause in 
the CREATE TABLE statement allows users to set those parameters.</p>
+<p>Now, Parquet file provides the following physical properties.</p>
+<ul class="simple">
+<li><tt class="docutils literal"><span 
class="pre">parquet.block.size</span></tt>: The block size is the size of a row 
group being buffered in memory. This limits the memory usage when writing. 
Larger values will improve the I/O when reading but consume more memory when 
writing. Default size is 134217728 bytes (= 128 * 1024 * 1024).</li>
+<li><tt class="docutils literal"><span 
class="pre">parquet.page.size</span></tt>: The page size is for compression. 
When reading, each page can be decompressed independently. A block is composed 
of pages. The page is the smallest unit that must be read fully to access a 
single record. If this value is too small, the compression will deteriorate. 
Default size is 1048576 bytes (= 1 * 1024 * 1024).</li>
+<li><tt class="docutils literal"><span 
class="pre">parquet.compression</span></tt>: The compression algorithm used to 
compress pages. It should be one of <tt class="docutils literal"><span 
class="pre">uncompressed</span></tt>, <tt class="docutils literal"><span 
class="pre">snappy</span></tt>, <tt class="docutils literal"><span 
class="pre">gzip</span></tt>, <tt class="docutils literal"><span 
class="pre">lzo</span></tt>. Default is <tt class="docutils literal"><span 
class="pre">uncompressed</span></tt>.</li>
+<li><tt class="docutils literal"><span 
class="pre">parquet.enable.dictionary</span></tt>: The boolean value is to 
enable/disable dictionary encoding. It should be one of either <tt 
class="docutils literal"><span class="pre">true</span></tt> or <tt 
class="docutils literal"><span class="pre">false</span></tt>. Default is <tt 
class="docutils literal"><span class="pre">true</span></tt>.</li>
+</ul>
+</div>
+<div class="section" id="compatibility-issues-with-apache-hive">
+<h2>Compatibility Issues with Apache Hive™<a class="headerlink" 
href="#compatibility-issues-with-apache-hive" title="Permalink to this 
headline">¶</a></h2>
+<p>At the moment, Tajo only supports flat relational tables.
+As a result, Tajo&#8217;s Parquet storage type does not support nested schemas.
+However, we are currently working on adding support for nested schemas and 
non-scalar types (<a class="reference external" 
href="https://issues.apache.org/jira/browse/TAJO-710";>TAJO-710</a>).</p>
+</div>
+</div>
+
+
+          </div>
+          <footer>
+  
+    <div class="rst-footer-buttons" role="navigation" aria-label="footer 
navigation">
+      
+        <a href="sequencefile.html" class="btn btn-neutral float-right" 
title="SequenceFile"/>Next <span class="fa fa-arrow-circle-right"></span></a>
+      
+      
+        <a href="rcfile.html" class="btn btn-neutral" title="RCFile"><span 
class="fa fa-arrow-circle-left"></span> Previous</a>
+      
+    </div>
+  
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>
+        &copy; Copyright 2014, Apache Tajo Team.
+    </p>
+  </div>
+
+  <a href="https://github.com/snide/sphinx_rtd_theme";>Sphinx theme</a> 
provided by <a href="https://readthedocs.org";>Read the Docs</a>
+</footer>
+        </div>
+      </div>
+
+    </section>
+
+  </div>
+  
+
+
+  
+
+    <script type="text/javascript">
+        var DOCUMENTATION_OPTIONS = {
+            URL_ROOT:'../',
+            VERSION:'0.8.0',
+            COLLAPSE_INDEX:false,
+            FILE_SUFFIX:'.html',
+            HAS_SOURCE:  true
+        };
+    </script>
+      <script type="text/javascript" src="../_static/jquery.js"></script>
+      <script type="text/javascript" src="../_static/underscore.js"></script>
+      <script type="text/javascript" src="../_static/doctools.js"></script>
+
+  
+
+  
+  
+    <script type="text/javascript" src="../_static/js/theme.js"></script>
+  
+
+  
+  
+  <script type="text/javascript">
+      jQuery(function () {
+          SphinxRtdTheme.StickyNav.enable();
+      });
+  </script>
+   
+
+</body>
+</html>
\ No newline at end of file

Added: tajo/site/docs/devel/table_management/rcfile.html
URL: 
http://svn.apache.org/viewvc/tajo/site/docs/devel/table_management/rcfile.html?rev=1644656&view=auto
==============================================================================
--- tajo/site/docs/devel/table_management/rcfile.html (added)
+++ tajo/site/docs/devel/table_management/rcfile.html Thu Dec 11 14:41:20 2014
@@ -0,0 +1,362 @@
+
+
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  
+  <title>RCFile &mdash; Apache Tajo 0.8.0 documentation</title>
+  
+
+  
+  
+
+  
+  <link 
href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700'
 rel='stylesheet' type='text/css'>
+
+  
+  
+    
+
+  
+
+  
+  
+    <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+  
+
+  
+    <link rel="top" title="Apache Tajo 0.8.0 documentation" 
href="../index.html"/>
+        <link rel="up" title="File Formats" href="file_formats.html"/>
+        <link rel="next" title="Parquet" href="parquet.html"/>
+        <link rel="prev" title="CSV (TextFile)" href="csv.html"/> 
+
+  
+  <script 
src="https://cdnjs.cloudflare.com/ajax/libs/modernizr/2.6.2/modernizr.min.js";></script>
+
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+  <div class="wy-grid-for-nav">
+
+    
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-nav-search">
+        <a href="../index.html" class="fa fa-home"> Apache Tajo</a>
+        <div role="search">
+  <form id ="rtd-search-form" class="wy-form" action="../search.html" 
method="get">
+    <input type="text" name="q" placeholder="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+      </div>
+
+      <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" 
aria-label="main navigation">
+        
+        
+            <ul class="current">
+<li class="toctree-l1"><a class="reference internal" 
href="../introduction.html">Introduction</a></li>
+<li class="toctree-l1"><a class="reference internal" 
href="../getting_started.html">Getting Started</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/prerequisites.html">Prerequisites</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/downloading_source.html">Dowload and unpack the source 
code</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/building.html">Build source code</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/local_setup.html">Setting up a local Tajo 
cluster</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../getting_started/first_query.html">First query execution</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../configuration.html">Configuration</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/preliminary.html">Preliminary</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/cluster_setup.html">Cluster Setup</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/tajo_master_configuration.html">Tajo Master 
Configuration</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/worker_configuration.html">Worker Configuration</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/catalog_configuration.html">Catalog 
Configuration</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/configuration_defaults.html">Configuration 
Defaults</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../configuration/ha_configuration.html">High Availability for 
TajoMaster</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="../tsql.html">Tajo 
Shell (TSQL)</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/meta_command.html">Meta Commands</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/dfs_command.html">Executing HDFS commands</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/variables.html">Session Variables</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/admin_command.html">Administration Commands</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/intro.html">Introducing to TSQL</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/single_command.html">Executing a single command</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/execute_file.html">Executing Queries from Files</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../tsql/background_command.html">Executing as background process</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../sql_language.html">SQL Language</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/data_model.html">Data Model</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/ddl.html">Data Definition Language</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/insert.html">INSERT (OVERWRITE) INTO</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/queries.html">Queries</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/sql_expression.html">SQL Expressions</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../sql_language/predicates.html">Predicates</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../functions.html">Functions</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../functions/math_func_and_operators.html">Math Functions and 
Operators</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../functions/string_func_and_operators.html">String Functions and 
Operators</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../functions/datetime_func_and_operators.html">DateTime Functions and 
Operators</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../functions/network_func_and_operators.html">Network Functions and 
Operators</a></li>
+</ul>
+</li>
+<li class="toctree-l1 current"><a class="reference internal" 
href="../table_management.html">Table Management</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" 
href="table_overview.html">Overview of Tajo Tables</a></li>
+<li class="toctree-l2 current"><a class="reference internal" 
href="file_formats.html">File Formats</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="compression.html">Compression</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../table_partitioning.html">Table Partitioning</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../partitioning/intro_to_partitioning.html">Introduction to 
Partitioning</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../partitioning/column_partitioning.html">Column Partitioning</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../partitioning/range_partitioning.html">Range Partitioning</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../partitioning/hash_partitioning.html">Hash Partitioning</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../backup_and_restore.html">Backup and Restore</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../backup_and_restore/catalog.html">Backup and Restore Catalog</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../hcatalog_integration.html">HCatalog Integration</a></li>
+<li class="toctree-l1"><a class="reference internal" 
href="../jdbc_driver.html">Tajo JDBC Driver</a><ul>
+<li class="toctree-l2"><a class="reference internal" 
href="../jdbc_driver.html#how-to-get-jdbc-driver">How to get JDBC 
driver</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../jdbc_driver.html#setting-the-classpath">Setting the CLASSPATH</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../jdbc_driver.html#an-example-jdbc-client">An Example JDBC 
Client</a></li>
+<li class="toctree-l2"><a class="reference internal" 
href="../jdbc_driver.html#faq">FAQ</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" 
href="../tajo_client_api.html">Tajo Client API</a></li>
+<li class="toctree-l1"><a class="reference internal" 
href="../faq.html">FAQ</a></li>
+</ul>
+
+        
+      </div>
+      &nbsp;
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+      
+      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+        <a href="../index.html">Apache Tajo</a>
+      </nav>
+
+
+      
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="breadcrumbs navigation">
+  <ul class="wy-breadcrumbs">
+    <li><a href="../index.html">Docs</a> &raquo;</li>
+      
+          <li><a href="../table_management.html">Table Management</a> 
&raquo;</li>
+      
+          <li><a href="file_formats.html">File Formats</a> &raquo;</li>
+      
+    <li>RCFile</li>
+      <li class="wy-breadcrumbs-aside">
+        
+          <a href="../_sources/table_management/rcfile.txt" rel="nofollow"> 
View page source</a>
+        
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main">
+            
+  <div class="section" id="rcfile">
+<h1>RCFile<a class="headerlink" href="#rcfile" title="Permalink to this 
headline">¶</a></h1>
+<p>RCFile, short of Record Columnar File, are flat files consisting of binary 
key/value pairs,
+which shares many similarities with SequenceFile.</p>
+<div class="section" id="how-to-create-a-rcfile-table">
+<h2>How to Create a RCFile Table?<a class="headerlink" 
href="#how-to-create-a-rcfile-table" title="Permalink to this 
headline">¶</a></h2>
+<p>If you are not familiar with the <tt class="docutils literal"><span 
class="pre">CREATE</span> <span class="pre">TABLE</span></tt> statement, please 
refer to the Data Definition Language <a class="reference internal" 
href="../sql_language/ddl.html"><em>Data Definition Language</em></a>.</p>
+<p>In order to specify a certain file format for your table, you need to use 
the <tt class="docutils literal"><span class="pre">USING</span></tt> clause in 
your <tt class="docutils literal"><span class="pre">CREATE</span> <span 
class="pre">TABLE</span></tt>
+statement. Below is an example statement for creating a table using RCFile.</p>
+<div class="highlight-sql"><div class="highlight"><pre><span 
class="k">CREATE</span> <span class="k">TABLE</span> <span 
class="n">table1</span> <span class="p">(</span>
+  <span class="n">id</span> <span class="nb">int</span><span class="p">,</span>
+  <span class="n">name</span> <span class="nb">text</span><span 
class="p">,</span>
+  <span class="n">score</span> <span class="nb">float</span><span 
class="p">,</span>
+  <span class="k">type</span> <span class="nb">text</span>
+<span class="p">)</span> <span class="k">USING</span> <span 
class="n">RCFILE</span><span class="p">;</span>
+</pre></div>
+</div>
+</div>
+<div class="section" id="physical-properties">
+<h2>Physical Properties<a class="headerlink" href="#physical-properties" 
title="Permalink to this headline">¶</a></h2>
+<p>Some table storage formats provide parameters for enabling or disabling 
features and adjusting physical parameters.
+The <tt class="docutils literal"><span class="pre">WITH</span></tt> clause in 
the CREATE TABLE statement allows users to set those parameters.</p>
+<p>Now, the RCFile storage type provides the following physical properties.</p>
+<ul class="simple">
+<li><tt class="docutils literal"><span class="pre">rcfile.serde</span></tt> : 
custom (De)serializer class. <tt class="docutils literal"><span 
class="pre">org.apache.tajo.storage.BinarySerializerDeserializer</span></tt> is 
the default (de)serializer class.</li>
+<li><tt class="docutils literal"><span class="pre">rcfile.null</span></tt> : 
NULL character. It is only used when a table uses <tt class="docutils 
literal"><span 
class="pre">org.apache.tajo.storage.TextSerializerDeserializer</span></tt>. The 
default NULL character is an empty string <tt class="docutils literal"><span 
class="pre">''</span></tt>. Hive&#8217;s default NULL character is <tt 
class="docutils literal"><span class="pre">'\\N'</span></tt>.</li>
+<li><tt class="docutils literal"><span 
class="pre">compression.codec</span></tt> : Compression codec. You can enable 
compression feature and set specified compression algorithm. The compression 
algorithm used to compress files. The compression codec name should be the 
fully qualified class name inherited from <a class="reference external" 
href="https://hadoop.apache.org/docs/current/api/org/apache/hadoop/io/compress/CompressionCodec.html";>org.apache.hadoop.io.compress.CompressionCodec</a>.
 By default, compression is disabled.</li>
+</ul>
+<p>The following is an example for creating a table using RCFile that uses 
compression.</p>
+<div class="highlight-sql"><div class="highlight"><pre><span 
class="k">CREATE</span> <span class="k">TABLE</span> <span 
class="n">table1</span> <span class="p">(</span>
+  <span class="n">id</span> <span class="nb">int</span><span class="p">,</span>
+  <span class="n">name</span> <span class="nb">text</span><span 
class="p">,</span>
+  <span class="n">score</span> <span class="nb">float</span><span 
class="p">,</span>
+  <span class="k">type</span> <span class="nb">text</span>
+<span class="p">)</span> <span class="k">USING</span> <span 
class="n">RCFILE</span> <span class="k">WITH</span> <span 
class="p">(</span><span class="s1">&#39;compression.codec&#39;</span><span 
class="o">=</span><span 
class="s1">&#39;org.apache.hadoop.io.compress.SnappyCodec&#39;</span><span 
class="p">);</span>
+</pre></div>
+</div>
+</div>
+<div class="section" id="rcfile-de-serializers">
+<h2>RCFile (De)serializers<a class="headerlink" href="#rcfile-de-serializers" 
title="Permalink to this headline">¶</a></h2>
+<p>Tajo provides two built-in (De)serializer for RCFile:</p>
+<ul class="simple">
+<li><tt class="docutils literal"><span 
class="pre">org.apache.tajo.storage.TextSerializerDeserializer</span></tt>: 
stores column values in a plain-text form.</li>
+<li><tt class="docutils literal"><span 
class="pre">org.apache.tajo.storage.BinarySerializerDeserializer</span></tt>: 
stores column values in a binary file format.</li>
+</ul>
+<p>The RCFile format can store some metadata in the RCFile header. Tajo writes 
the (de)serializer class name into
+the metadata header of each RCFile when the RCFile is created in Tajo.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last"><tt class="docutils literal"><span 
class="pre">org.apache.tajo.storage.BinarySerializerDeserializer</span></tt> is 
the default (de) serializer for RCFile.</p>
+</div>
+</div>
+<div class="section" id="compatibility-issues-with-apache-hive">
+<h2>Compatibility Issues with Apache Hive™<a class="headerlink" 
href="#compatibility-issues-with-apache-hive" title="Permalink to this 
headline">¶</a></h2>
+<p>Regardless of whether the RCFiles are written by Apache Hive™ or Apache 
Tajo™, the files are compatible in both systems.
+In other words, Tajo can process RCFiles written by Apache Hive and vice 
versa.</p>
+<p>Since there are no metadata in RCFiles written by Hive, we need to manually 
specify the (de)serializer class name
+by setting a physical property.</p>
+<p>In Hive, there are two SerDe, and they correspond to the following 
(de)serializer in Tajo.</p>
+<ul class="simple">
+<li><tt class="docutils literal"><span 
class="pre">org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe</span></tt>: 
corresponds to <tt class="docutils literal"><span 
class="pre">TextSerializerDeserializer</span></tt> in Tajo.</li>
+<li><tt class="docutils literal"><span 
class="pre">org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe</span></tt>:
 corresponds to <tt class="docutils literal"><span 
class="pre">BinarySerializerDeserializer</span></tt> in Tajo.</li>
+</ul>
+<p>The compatibility issue mostly occurs when a user creates an external table 
pointing to data of an existing table.
+The following section explains two cases: 1) the case where Tajo reads RCFile 
written by Hive, and
+2) the case where Hive reads RCFile written by Tajo.</p>
+<div class="section" id="when-tajo-reads-rcfile-generated-in-hive">
+<h3>When Tajo reads RCFile generated in Hive<a class="headerlink" 
href="#when-tajo-reads-rcfile-generated-in-hive" title="Permalink to this 
headline">¶</a></h3>
+<p>To create an external RCFile table generated with <tt class="docutils 
literal"><span class="pre">ColumnarSerDe</span></tt> in Hive,
+you should set the physical property <tt class="docutils literal"><span 
class="pre">rcfile.serde</span></tt> in Tajo as follows:</p>
+<div class="highlight-sql"><div class="highlight"><pre><span 
class="k">CREATE</span> <span class="k">EXTERNAL</span> <span 
class="k">TABLE</span> <span class="n">table1</span> <span class="p">(</span>
+  <span class="n">id</span> <span class="nb">int</span><span class="p">,</span>
+  <span class="n">name</span> <span class="nb">text</span><span 
class="p">,</span>
+  <span class="n">score</span> <span class="nb">float</span><span 
class="p">,</span>
+  <span class="k">type</span> <span class="nb">text</span>
+<span class="p">)</span> <span class="k">USING</span> <span 
class="n">RCFILE</span> <span class="k">with</span> <span class="p">(</span> 
<span class="s1">&#39;rcfile.serde&#39;</span><span class="o">=</span><span 
class="s1">&#39;org.apache.tajo.storage.TextSerializerDeserializer&#39;</span><span
 class="p">,</span> <span class="s1">&#39;rcfile.null&#39;</span><span 
class="o">=</span><span class="s1">&#39;\\N&#39;</span> <span class="p">)</span>
+<span class="k">LOCATION</span> <span class="s1">&#39;....&#39;</span><span 
class="p">;</span>
+</pre></div>
+</div>
+<p>To create an external RCFile table generated with <tt class="docutils 
literal"><span class="pre">LazyBinaryColumnarSerDe</span></tt> in Hive,
+you should set the physical property <tt class="docutils literal"><span 
class="pre">rcfile.serde</span></tt> in Tajo as follows:</p>
+<div class="highlight-sql"><div class="highlight"><pre><span 
class="k">CREATE</span> <span class="k">EXTERNAL</span> <span 
class="k">TABLE</span> <span class="n">table1</span> <span class="p">(</span>
+  <span class="n">id</span> <span class="nb">int</span><span class="p">,</span>
+  <span class="n">name</span> <span class="nb">text</span><span 
class="p">,</span>
+  <span class="n">score</span> <span class="nb">float</span><span 
class="p">,</span>
+  <span class="k">type</span> <span class="nb">text</span>
+<span class="p">)</span> <span class="k">USING</span> <span 
class="n">RCFILE</span> <span class="k">WITH</span> <span 
class="p">(</span><span class="s1">&#39;rcfile.serde&#39;</span> <span 
class="o">=</span> <span 
class="s1">&#39;org.apache.tajo.storage.BinarySerializerDeserializer&#39;</span><span
 class="p">)</span>
+<span class="k">LOCATION</span> <span class="s1">&#39;....&#39;</span><span 
class="p">;</span>
+</pre></div>
+</div>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">As we mentioned above, <tt class="docutils literal"><span 
class="pre">BinarySerializerDeserializer</span></tt> is the default (de) 
serializer for RCFile.
+So, you can omit the <tt class="docutils literal"><span 
class="pre">rcfile.serde</span></tt> only for <tt class="docutils 
literal"><span 
class="pre">org.apache.tajo.storage.BinarySerializerDeserializer</span></tt>.</p>
+</div>
+</div>
+<div class="section" id="when-hive-reads-rcfile-generated-in-tajo">
+<h3>When Hive reads RCFile generated in Tajo<a class="headerlink" 
href="#when-hive-reads-rcfile-generated-in-tajo" title="Permalink to this 
headline">¶</a></h3>
+<p>To create an external RCFile table written by Tajo with <tt class="docutils 
literal"><span class="pre">TextSerializerDeserializer</span></tt>,
+you should set the <tt class="docutils literal"><span 
class="pre">SERDE</span></tt> as follows:</p>
+<div class="highlight-sql"><div class="highlight"><pre><span 
class="k">CREATE</span> <span class="k">TABLE</span> <span 
class="n">table1</span> <span class="p">(</span>
+  <span class="n">id</span> <span class="nb">int</span><span class="p">,</span>
+  <span class="n">name</span> <span class="n">string</span><span 
class="p">,</span>
+  <span class="n">score</span> <span class="nb">float</span><span 
class="p">,</span>
+  <span class="k">type</span> <span class="n">string</span>
+<span class="p">)</span> <span class="k">ROW</span> <span 
class="n">FORMAT</span> <span class="n">SERDE</span> <span 
class="s1">&#39;org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe&#39;</span>
 <span class="n">STORED</span> <span class="k">AS</span> <span 
class="n">RCFILE</span>
+<span class="k">LOCATION</span> <span 
class="s1">&#39;&lt;hdfs_location&gt;&#39;</span><span class="p">;</span>
+</pre></div>
+</div>
+<p>To create an external RCFile table written by Tajo with <tt class="docutils 
literal"><span class="pre">BinarySerializerDeserializer</span></tt>,
+you should set the <tt class="docutils literal"><span 
class="pre">SERDE</span></tt> as follows:</p>
+<div class="highlight-sql"><div class="highlight"><pre><span 
class="k">CREATE</span> <span class="k">TABLE</span> <span 
class="n">table1</span> <span class="p">(</span>
+  <span class="n">id</span> <span class="nb">int</span><span class="p">,</span>
+  <span class="n">name</span> <span class="n">string</span><span 
class="p">,</span>
+  <span class="n">score</span> <span class="nb">float</span><span 
class="p">,</span>
+  <span class="k">type</span> <span class="n">string</span>
+<span class="p">)</span> <span class="k">ROW</span> <span 
class="n">FORMAT</span> <span class="n">SERDE</span> <span 
class="s1">&#39;org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe&#39;</span>
 <span class="n">STORED</span> <span class="k">AS</span> <span 
class="n">RCFILE</span>
+<span class="k">LOCATION</span> <span 
class="s1">&#39;&lt;hdfs_location&gt;&#39;</span><span class="p">;</span>
+</pre></div>
+</div>
+</div>
+</div>
+</div>
+
+
+          </div>
+          <footer>
+  
+    <div class="rst-footer-buttons" role="navigation" aria-label="footer 
navigation">
+      
+        <a href="parquet.html" class="btn btn-neutral float-right" 
title="Parquet"/>Next <span class="fa fa-arrow-circle-right"></span></a>
+      
+      
+        <a href="csv.html" class="btn btn-neutral" title="CSV 
(TextFile)"><span class="fa fa-arrow-circle-left"></span> Previous</a>
+      
+    </div>
+  
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>
+        &copy; Copyright 2014, Apache Tajo Team.
+    </p>
+  </div>
+
+  <a href="https://github.com/snide/sphinx_rtd_theme";>Sphinx theme</a> 
provided by <a href="https://readthedocs.org";>Read the Docs</a>
+</footer>
+        </div>
+      </div>
+
+    </section>
+
+  </div>
+  
+
+
+  
+
+    <script type="text/javascript">
+        var DOCUMENTATION_OPTIONS = {
+            URL_ROOT:'../',
+            VERSION:'0.8.0',
+            COLLAPSE_INDEX:false,
+            FILE_SUFFIX:'.html',
+            HAS_SOURCE:  true
+        };
+    </script>
+      <script type="text/javascript" src="../_static/jquery.js"></script>
+      <script type="text/javascript" src="../_static/underscore.js"></script>
+      <script type="text/javascript" src="../_static/doctools.js"></script>
+
+  
+
+  
+  
+    <script type="text/javascript" src="../_static/js/theme.js"></script>
+  
+
+  
+  
+  <script type="text/javascript">
+      jQuery(function () {
+          SphinxRtdTheme.StickyNav.enable();
+      });
+  </script>
+   
+
+</body>
+</html>
\ No newline at end of file


Reply via email to