Repository: tajo
Updated Branches:
  refs/heads/master 8457d85a3 -> 149a44d85


TAJO-1857: Rename the section of 'File Formats' to 'Data Formats' and fill 
compression section of the 'Table Management' chapter.

Signed-off-by: Jihoon Son <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/149a44d8
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/149a44d8
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/149a44d8

Branch: refs/heads/master
Commit: 149a44d85c5bcd7677dfd5db3a3d39644a4c8194
Parents: 8457d85
Author: Jongyoung Park <[email protected]>
Authored: Thu Nov 26 09:40:56 2015 +0900
Committer: Jihoon Son <[email protected]>
Committed: Thu Nov 26 09:41:20 2015 +0900

----------------------------------------------------------------------
 CHANGES                                         |  4 ++++
 .../java/org/apache/tajo/conf/TajoConf.java     |  2 +-
 .../planner/physical/PhysicalPlanUtil.java      |  6 ++---
 tajo-docs/src/main/sphinx/table_management.rst  |  8 +++----
 .../sphinx/table_management/compression.rst     | 24 +++++++++++++++++---
 .../sphinx/table_management/data_formats.rst    | 15 ++++++++++++
 .../sphinx/table_management/file_formats.rst    | 15 ------------
 .../sphinx/table_management/table_overview.rst  | 17 +++++++++-----
 .../sphinx/table_management/tablespaces.rst     |  4 ++--
 .../apache/tajo/storage/StorageProperty.java    |  6 ++---
 10 files changed, 64 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index a401989..653dc6f 100644
--- a/CHANGES
+++ b/CHANGES
@@ -86,6 +86,10 @@ Release 0.12.0 - unreleased
 
   TASKS
 
+    TAJO-1857: Rename the section of 'File Formats' to 'Data Formats' and fill 
+    compression section of the 'Table Management' chapter. 
+    (Contributed by Jongyoung Park. Committed by jihoon)
+
     TAJO-1975: Gathering fine-grained column statistics for range shuffle. 
(jihoon)
 
     TAJO-1963: Add more configuration descriptions to document. (jihoon)

http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java 
b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
index a2c1fb8..9f788eb 100644
--- a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
+++ b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
@@ -368,7 +368,7 @@ public class TajoConf extends Configuration {
     $TIMEZONE("tajo.timezone", TimeZone.getDefault().getID()),
     $DATE_ORDER("tajo.datetime.date-order", "YMD"),
 
-    // FILE FORMAT
+    // null character for text file output
     $TEXT_NULL("tajo.text.null", "\\\\N"),
 
     // Only for Debug and Testing

http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java
----------------------------------------------------------------------
diff --git 
a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java
 
b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java
index baa0bf6..d1dfe40 100644
--- 
a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java
+++ 
b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java
@@ -203,7 +203,7 @@ public class PhysicalPlanUtil {
   }
 
   /**
-   * Set nullChar to TableMeta according to file format
+   * Set nullChar to TableMeta according to data format
    *
    * @param meta TableMeta
    * @param nullChar A character for NULL representation
@@ -220,10 +220,10 @@ public class PhysicalPlanUtil {
   }
 
   /**
-   * Check if TableMeta contains NULL char property according to file format
+   * Check if TableMeta contains NULL char property according to data format
    *
    * @param meta Table Meta
-   * @return True if TableMeta contains NULL char property according to file 
format
+   * @return True if TableMeta contains NULL char property according to data 
format
    */
   public static boolean containsNullChar(TableMeta meta) {
     String dataFormat = meta.getDataFormat();

http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/tajo-docs/src/main/sphinx/table_management.rst
----------------------------------------------------------------------
diff --git a/tajo-docs/src/main/sphinx/table_management.rst 
b/tajo-docs/src/main/sphinx/table_management.rst
index 5a4693e..dd7b9ad 100644
--- a/tajo-docs/src/main/sphinx/table_management.rst
+++ b/tajo-docs/src/main/sphinx/table_management.rst
@@ -1,6 +1,6 @@
-******************
+****************
 Table Management
-******************
+****************
 
 In Tajo, a table is a logical view of one data sources. Logically, one table 
consists of a logical schema, partitions, URL, and various properties. 
Physically, A table can be a directory in HDFS, a single file, one HBase table, 
or a RDBMS table. In order to make good use of Tajo, users need to understand 
features and physical characteristics of their physical layout. This section 
explains all about table management.
 
@@ -9,5 +9,5 @@ In Tajo, a table is a logical view of one data sources. 
Logically, one table con
 
     table_management/table_overview
     table_management/tablespaces
-    table_management/file_formats
-    table_management/compression
\ No newline at end of file
+    table_management/data_formats
+    table_management/compression

http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/tajo-docs/src/main/sphinx/table_management/compression.rst
----------------------------------------------------------------------
diff --git a/tajo-docs/src/main/sphinx/table_management/compression.rst 
b/tajo-docs/src/main/sphinx/table_management/compression.rst
index 3d03ba8..67dd255 100644
--- a/tajo-docs/src/main/sphinx/table_management/compression.rst
+++ b/tajo-docs/src/main/sphinx/table_management/compression.rst
@@ -1,5 +1,23 @@
-*********************************
+***********
 Compression
-*********************************
+***********
 
-.. todo::
\ No newline at end of file
+Using compression can make data size compact, thereby enabling efficient use 
of network bandwidth and storage. Most of Tajo data formats support data 
compression feature.
+Currently, compression configuration affects only for stored data format and 
it is specified when a table is created as table meta information(See `Create 
Table <../sql_language/ddl.html#create-table>`_).
+Compression for intermidate data or others is not supported now.
+
+===========================================
+Compression Properties for each Data Format
+===========================================
+
+ .. csv-table:: Compression Properties
+
+  **Data Format**,**Property Name**,**Avaliable Values**
+  
:doc:`text</table_management/text>`/:doc:`json</table_management/json>`/:doc:`rcfile</table_management/rcfile>`/:doc:`sequencefile</table_management/sequencefile>`
 [#f1]_,compression.codec,Fully Qualified Classname in Hadoop [#f2]_
+  
:doc:`parquet</table_management/parquet>`,parquet.compression,uncompressed/snappy/gzip/lzo
+  :doc:`orc</table_management/orc>`,orc.compression.kind,none/snappy/zlib
+
+.. rubric:: Footnotes
+
+.. [#f1] For sequence file, you should specify 'compression.type' in addition 
to 'compression.codec'. Refer to :doc:`/table_management/sequencefile`.
+.. [#f2] All classes are available if they implement 
`org.apache.hadoop.io.compress.CompressionCodec 
<https://hadoop.apache.org/docs/current/api/org/apache/hadoop/io/compress/CompressionCodec.html>`_.

http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/tajo-docs/src/main/sphinx/table_management/data_formats.rst
----------------------------------------------------------------------
diff --git a/tajo-docs/src/main/sphinx/table_management/data_formats.rst 
b/tajo-docs/src/main/sphinx/table_management/data_formats.rst
new file mode 100644
index 0000000..b17e3f3
--- /dev/null
+++ b/tajo-docs/src/main/sphinx/table_management/data_formats.rst
@@ -0,0 +1,15 @@
+************
+Data Formats
+************
+
+Currently, Tajo provides following data formats:
+
+.. toctree::
+    :maxdepth: 1
+
+    text
+    json
+    rcfile
+    parquet
+    orc
+    sequencefile
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/tajo-docs/src/main/sphinx/table_management/file_formats.rst
----------------------------------------------------------------------
diff --git a/tajo-docs/src/main/sphinx/table_management/file_formats.rst 
b/tajo-docs/src/main/sphinx/table_management/file_formats.rst
deleted file mode 100644
index 966903c..0000000
--- a/tajo-docs/src/main/sphinx/table_management/file_formats.rst
+++ /dev/null
@@ -1,15 +0,0 @@
-*************************************
-File Formats
-*************************************
-
-Currently, Tajo provides following file formats:
-
-.. toctree::
-    :maxdepth: 1
-
-    text
-    json
-    rcfile
-    parquet
-    orc
-    sequencefile
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/tajo-docs/src/main/sphinx/table_management/table_overview.rst
----------------------------------------------------------------------
diff --git a/tajo-docs/src/main/sphinx/table_management/table_overview.rst 
b/tajo-docs/src/main/sphinx/table_management/table_overview.rst
index d030b4a..7ecf53b 100644
--- a/tajo-docs/src/main/sphinx/table_management/table_overview.rst
+++ b/tajo-docs/src/main/sphinx/table_management/table_overview.rst
@@ -1,10 +1,12 @@
-*************************************
+***********************
 Overview of Tajo Tables
-*************************************
+***********************
 
+========
 Overview
 ========
 
+===========
 Tablespaces
 ===========
 
@@ -12,8 +14,9 @@ Tablespaces is a physical location where files or data 
objects representing data
 
 Please refer to :doc:`/table_management/tablespaces` if you want to know more 
information about tablespaces.
 
+=============
 Managed Table
-================
+=============
 
 ``CREATE TABLE`` statement lets you create a table located in the warehouse 
directory specified by the configuration property ``tajo.warehouse.directory`` 
or ``${tajo.root}/warehouse`` by default. For example:
 
@@ -26,8 +29,9 @@ Managed Table
  );
 
 
+==============
 External Table
-================
+==============
 
 ``CREATE EXTERNAL TABLE`` statement lets you create a table located in a 
specify location so that Tajo does not use a default data warehouse location 
for the table. External tables are in common used if you already have data 
generated. LOCATION clause must be required for an external table. 
 
@@ -65,7 +69,7 @@ The following example is to set a custom field delimiter, 
NULL character, and co
                    'text.null'='\\N',
                    
'compression.codec'='org.apache.hadoop.io.compress.SnappyCodec');
 
-Each physical table layout has its own specialized properties. They will be 
addressed in :doc:`/table_management/file_formats`.
+Each physical table layout has its own specialized properties. They will be 
addressed in :doc:`/table_management/data_formats`.
 
 
 Common Table Properties
@@ -75,7 +79,8 @@ There are some common table properties which are used in most 
tables.
 
 Compression
 -----------
-.. todo::
+
+See :doc:`compression`.
 
 Time zone
 ---------

http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/tajo-docs/src/main/sphinx/table_management/tablespaces.rst
----------------------------------------------------------------------
diff --git a/tajo-docs/src/main/sphinx/table_management/tablespaces.rst 
b/tajo-docs/src/main/sphinx/table_management/tablespaces.rst
index 79ea65f..9316154 100644
--- a/tajo-docs/src/main/sphinx/table_management/tablespaces.rst
+++ b/tajo-docs/src/main/sphinx/table_management/tablespaces.rst
@@ -1,6 +1,6 @@
-*************************************
+***********
 Tablespaces
-*************************************
+***********
 
 Tablespaces in Tajo allow users to define locations in the storage system 
where the files or data objects representing database objects can be stored. 
Once defined, a tablespace can be referred to by name when creating a database 
or a table. Especially, it is very useful when a Tajo cluster instance should 
use heterogeneous storage systems such as HDFS, MySQL, and Oracle.
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/StorageProperty.java
----------------------------------------------------------------------
diff --git 
a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/StorageProperty.java
 
b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/StorageProperty.java
index 41ecd38..c3dc3e9 100644
--- 
a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/StorageProperty.java
+++ 
b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/StorageProperty.java
@@ -22,7 +22,7 @@ package org.apache.tajo.storage;
  * Storage Properties
  */
 public class StorageProperty {
-  /** default file format */
+  /** default data format */
   private final String defaultFormat;
   /** if this storage supports move operator */
   private final boolean movable;
@@ -47,8 +47,8 @@ public class StorageProperty {
   }
 
   /**
-   * Return default file format
-   * @return Default file format
+   * Return default data format
+   * @return Default data format
    */
   public String defaultFormat() {
     return defaultFormat;

Reply via email to