Repository: tajo Updated Branches: refs/heads/master 8457d85a3 -> 149a44d85
TAJO-1857: Rename the section of 'File Formats' to 'Data Formats' and fill compression section of the 'Table Management' chapter. Signed-off-by: Jihoon Son <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/149a44d8 Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/149a44d8 Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/149a44d8 Branch: refs/heads/master Commit: 149a44d85c5bcd7677dfd5db3a3d39644a4c8194 Parents: 8457d85 Author: Jongyoung Park <[email protected]> Authored: Thu Nov 26 09:40:56 2015 +0900 Committer: Jihoon Son <[email protected]> Committed: Thu Nov 26 09:41:20 2015 +0900 ---------------------------------------------------------------------- CHANGES | 4 ++++ .../java/org/apache/tajo/conf/TajoConf.java | 2 +- .../planner/physical/PhysicalPlanUtil.java | 6 ++--- tajo-docs/src/main/sphinx/table_management.rst | 8 +++---- .../sphinx/table_management/compression.rst | 24 +++++++++++++++++--- .../sphinx/table_management/data_formats.rst | 15 ++++++++++++ .../sphinx/table_management/file_formats.rst | 15 ------------ .../sphinx/table_management/table_overview.rst | 17 +++++++++----- .../sphinx/table_management/tablespaces.rst | 4 ++-- .../apache/tajo/storage/StorageProperty.java | 6 ++--- 10 files changed, 64 insertions(+), 37 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index a401989..653dc6f 100644 --- a/CHANGES +++ b/CHANGES @@ -86,6 +86,10 @@ Release 0.12.0 - unreleased TASKS + TAJO-1857: Rename the section of 'File Formats' to 'Data Formats' and fill + compression section of the 'Table Management' chapter. + (Contributed by Jongyoung Park. Committed by jihoon) + TAJO-1975: Gathering fine-grained column statistics for range shuffle. (jihoon) TAJO-1963: Add more configuration descriptions to document. (jihoon) http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java index a2c1fb8..9f788eb 100644 --- a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java +++ b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java @@ -368,7 +368,7 @@ public class TajoConf extends Configuration { $TIMEZONE("tajo.timezone", TimeZone.getDefault().getID()), $DATE_ORDER("tajo.datetime.date-order", "YMD"), - // FILE FORMAT + // null character for text file output $TEXT_NULL("tajo.text.null", "\\\\N"), // Only for Debug and Testing http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java index baa0bf6..d1dfe40 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java @@ -203,7 +203,7 @@ public class PhysicalPlanUtil { } /** - * Set nullChar to TableMeta according to file format + * Set nullChar to TableMeta according to data format * * @param meta TableMeta * @param nullChar A character for NULL representation @@ -220,10 +220,10 @@ public class PhysicalPlanUtil { } /** - * Check if TableMeta contains NULL char property according to file format + * Check if TableMeta contains NULL char property according to data format * * @param meta Table Meta - * @return True if TableMeta contains NULL char property according to file format + * @return True if TableMeta contains NULL char property according to data format */ public static boolean containsNullChar(TableMeta meta) { String dataFormat = meta.getDataFormat(); http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/tajo-docs/src/main/sphinx/table_management.rst ---------------------------------------------------------------------- diff --git a/tajo-docs/src/main/sphinx/table_management.rst b/tajo-docs/src/main/sphinx/table_management.rst index 5a4693e..dd7b9ad 100644 --- a/tajo-docs/src/main/sphinx/table_management.rst +++ b/tajo-docs/src/main/sphinx/table_management.rst @@ -1,6 +1,6 @@ -****************** +**************** Table Management -****************** +**************** In Tajo, a table is a logical view of one data sources. Logically, one table consists of a logical schema, partitions, URL, and various properties. Physically, A table can be a directory in HDFS, a single file, one HBase table, or a RDBMS table. In order to make good use of Tajo, users need to understand features and physical characteristics of their physical layout. This section explains all about table management. @@ -9,5 +9,5 @@ In Tajo, a table is a logical view of one data sources. Logically, one table con table_management/table_overview table_management/tablespaces - table_management/file_formats - table_management/compression \ No newline at end of file + table_management/data_formats + table_management/compression http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/tajo-docs/src/main/sphinx/table_management/compression.rst ---------------------------------------------------------------------- diff --git a/tajo-docs/src/main/sphinx/table_management/compression.rst b/tajo-docs/src/main/sphinx/table_management/compression.rst index 3d03ba8..67dd255 100644 --- a/tajo-docs/src/main/sphinx/table_management/compression.rst +++ b/tajo-docs/src/main/sphinx/table_management/compression.rst @@ -1,5 +1,23 @@ -********************************* +*********** Compression -********************************* +*********** -.. todo:: \ No newline at end of file +Using compression can make data size compact, thereby enabling efficient use of network bandwidth and storage. Most of Tajo data formats support data compression feature. +Currently, compression configuration affects only for stored data format and it is specified when a table is created as table meta information(See `Create Table <../sql_language/ddl.html#create-table>`_). +Compression for intermidate data or others is not supported now. + +=========================================== +Compression Properties for each Data Format +=========================================== + + .. csv-table:: Compression Properties + + **Data Format**,**Property Name**,**Avaliable Values** + :doc:`text</table_management/text>`/:doc:`json</table_management/json>`/:doc:`rcfile</table_management/rcfile>`/:doc:`sequencefile</table_management/sequencefile>` [#f1]_,compression.codec,Fully Qualified Classname in Hadoop [#f2]_ + :doc:`parquet</table_management/parquet>`,parquet.compression,uncompressed/snappy/gzip/lzo + :doc:`orc</table_management/orc>`,orc.compression.kind,none/snappy/zlib + +.. rubric:: Footnotes + +.. [#f1] For sequence file, you should specify 'compression.type' in addition to 'compression.codec'. Refer to :doc:`/table_management/sequencefile`. +.. [#f2] All classes are available if they implement `org.apache.hadoop.io.compress.CompressionCodec <https://hadoop.apache.org/docs/current/api/org/apache/hadoop/io/compress/CompressionCodec.html>`_. http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/tajo-docs/src/main/sphinx/table_management/data_formats.rst ---------------------------------------------------------------------- diff --git a/tajo-docs/src/main/sphinx/table_management/data_formats.rst b/tajo-docs/src/main/sphinx/table_management/data_formats.rst new file mode 100644 index 0000000..b17e3f3 --- /dev/null +++ b/tajo-docs/src/main/sphinx/table_management/data_formats.rst @@ -0,0 +1,15 @@ +************ +Data Formats +************ + +Currently, Tajo provides following data formats: + +.. toctree:: + :maxdepth: 1 + + text + json + rcfile + parquet + orc + sequencefile \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/tajo-docs/src/main/sphinx/table_management/file_formats.rst ---------------------------------------------------------------------- diff --git a/tajo-docs/src/main/sphinx/table_management/file_formats.rst b/tajo-docs/src/main/sphinx/table_management/file_formats.rst deleted file mode 100644 index 966903c..0000000 --- a/tajo-docs/src/main/sphinx/table_management/file_formats.rst +++ /dev/null @@ -1,15 +0,0 @@ -************************************* -File Formats -************************************* - -Currently, Tajo provides following file formats: - -.. toctree:: - :maxdepth: 1 - - text - json - rcfile - parquet - orc - sequencefile \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/tajo-docs/src/main/sphinx/table_management/table_overview.rst ---------------------------------------------------------------------- diff --git a/tajo-docs/src/main/sphinx/table_management/table_overview.rst b/tajo-docs/src/main/sphinx/table_management/table_overview.rst index d030b4a..7ecf53b 100644 --- a/tajo-docs/src/main/sphinx/table_management/table_overview.rst +++ b/tajo-docs/src/main/sphinx/table_management/table_overview.rst @@ -1,10 +1,12 @@ -************************************* +*********************** Overview of Tajo Tables -************************************* +*********************** +======== Overview ======== +=========== Tablespaces =========== @@ -12,8 +14,9 @@ Tablespaces is a physical location where files or data objects representing data Please refer to :doc:`/table_management/tablespaces` if you want to know more information about tablespaces. +============= Managed Table -================ +============= ``CREATE TABLE`` statement lets you create a table located in the warehouse directory specified by the configuration property ``tajo.warehouse.directory`` or ``${tajo.root}/warehouse`` by default. For example: @@ -26,8 +29,9 @@ Managed Table ); +============== External Table -================ +============== ``CREATE EXTERNAL TABLE`` statement lets you create a table located in a specify location so that Tajo does not use a default data warehouse location for the table. External tables are in common used if you already have data generated. LOCATION clause must be required for an external table. @@ -65,7 +69,7 @@ The following example is to set a custom field delimiter, NULL character, and co 'text.null'='\\N', 'compression.codec'='org.apache.hadoop.io.compress.SnappyCodec'); -Each physical table layout has its own specialized properties. They will be addressed in :doc:`/table_management/file_formats`. +Each physical table layout has its own specialized properties. They will be addressed in :doc:`/table_management/data_formats`. Common Table Properties @@ -75,7 +79,8 @@ There are some common table properties which are used in most tables. Compression ----------- -.. todo:: + +See :doc:`compression`. Time zone --------- http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/tajo-docs/src/main/sphinx/table_management/tablespaces.rst ---------------------------------------------------------------------- diff --git a/tajo-docs/src/main/sphinx/table_management/tablespaces.rst b/tajo-docs/src/main/sphinx/table_management/tablespaces.rst index 79ea65f..9316154 100644 --- a/tajo-docs/src/main/sphinx/table_management/tablespaces.rst +++ b/tajo-docs/src/main/sphinx/table_management/tablespaces.rst @@ -1,6 +1,6 @@ -************************************* +*********** Tablespaces -************************************* +*********** Tablespaces in Tajo allow users to define locations in the storage system where the files or data objects representing database objects can be stored. Once defined, a tablespace can be referred to by name when creating a database or a table. Especially, it is very useful when a Tajo cluster instance should use heterogeneous storage systems such as HDFS, MySQL, and Oracle. http://git-wip-us.apache.org/repos/asf/tajo/blob/149a44d8/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/StorageProperty.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/StorageProperty.java b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/StorageProperty.java index 41ecd38..c3dc3e9 100644 --- a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/StorageProperty.java +++ b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/StorageProperty.java @@ -22,7 +22,7 @@ package org.apache.tajo.storage; * Storage Properties */ public class StorageProperty { - /** default file format */ + /** default data format */ private final String defaultFormat; /** if this storage supports move operator */ private final boolean movable; @@ -47,8 +47,8 @@ public class StorageProperty { } /** - * Return default file format - * @return Default file format + * Return default data format + * @return Default data format */ public String defaultFormat() { return defaultFormat;
