[
https://issues.apache.org/jira/browse/TAJO-2069?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15290693#comment-15290693
]
ASF GitHub Bot commented on TAJO-2069:
--------------------------------------
Github user blrunner commented on the pull request:
https://github.com/apache/tajo/pull/1024#issuecomment-220260771
@jinossy
I generated partitioned tables on HDFS, and then uploaded output files to
S3 with aws sdk, finally created external table on ec2. Here are my test
environment.
* Hadoop version: apache hadoop 2.7.1
* Basic data : TPC-H 1G data set
* CTAS for partitioned table with HDFS
```
CREATE TABLE lineitem_p1 (
l_orderkey INT8, l_partkey INT8, l_suppkey INT8, l_linenumber INT8,
l_quantity FLOAT8,
l_extendedprice FLOAT8, l_discount FLOAT8, l_tax FLOAT8, l_returnflag
TEXT, l_linestatus TEXT,
l_commitdate DATE, l_receiptdate DATE, l_shipinstruct TEXT, l_shipmode
TEXT, l_comment TEXT
)
USING TEXT WITH ('text.delimiter'='|')
PARTITION BY COLUMN(l_shipdate text)
AS
SELECT L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER, L_QUANTITY,
L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS,
L_COMMITDATE, L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT,
L_SHIPDATE FROM LINEITEM
where l_shipdate < '1992-01-07';
CREATE TABLE lineitem_p2 (
l_orderkey INT8, l_partkey INT8, l_suppkey INT8, l_linenumber INT8,
l_quantity FLOAT8,
l_extendedprice FLOAT8, l_discount FLOAT8, l_tax FLOAT8, l_returnflag
TEXT, l_linestatus TEXT,
l_commitdate DATE, l_receiptdate DATE, l_shipinstruct TEXT, l_shipmode
TEXT, l_comment TEXT
)
USING TEXT WITH ('text.delimiter'='|')
PARTITION BY COLUMN(l_shipdate text)
AS SELECT L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER, L_QUANTITY,
L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_COMMITDATE,
L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT, L_SHIPDATE FROM LINEITEM
where l_shipdate < '1993-01-01';
CREATE TABLE lineitem_p3 (
l_orderkey INT8, l_partkey INT8, l_suppkey INT8, l_linenumber INT8,
l_quantity FLOAT8,
l_extendedprice FLOAT8, l_discount FLOAT8, l_tax FLOAT8, l_returnflag
TEXT, l_linestatus TEXT,
l_commitdate DATE, l_receiptdate DATE, l_shipinstruct TEXT, l_shipmode
TEXT, l_comment TEXT
)
USING TEXT WITH ('text.delimiter'='|')
PARTITION BY COLUMN(l_shipdate text)
AS SELECT L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER, L_QUANTITY,
L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_COMMITDATE,
L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT, L_SHIPDATE FROM LINEITEM
where l_shipdate < '1994-01-01';
CREATE TABLE lineitem_p4 (
l_orderkey INT8, l_partkey INT8, l_suppkey INT8, l_linenumber INT8,
l_quantity FLOAT8,
l_extendedprice FLOAT8, l_discount FLOAT8, l_tax FLOAT8, l_returnflag
TEXT, l_linestatus TEXT,
l_commitdate DATE, l_receiptdate DATE, l_shipinstruct TEXT, l_shipmode
TEXT, l_comment TEXT
)
USING TEXT WITH ('text.delimiter'='|')
PARTITION BY COLUMN(l_shipdate text)
AS SELECT L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER, L_QUANTITY,
L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_COMMITDATE,
L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT, L_SHIPDATE FROM LINEITEM
where l_shipdate < '1995-01-01';
CREATE TABLE lineitem_p5 (
l_orderkey INT8, l_partkey INT8, l_suppkey INT8, l_linenumber INT8,
l_quantity FLOAT8,
l_extendedprice FLOAT8, l_discount FLOAT8, l_tax FLOAT8, l_returnflag
TEXT, l_linestatus TEXT,
l_commitdate DATE, l_receiptdate DATE, l_shipinstruct TEXT, l_shipmode
TEXT, l_comment TEXT
)
USING TEXT WITH ('text.delimiter'='|')
PARTITION BY COLUMN(l_shipdate text)
AS SELECT L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER, L_QUANTITY,
L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_COMMITDATE,
L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT, L_SHIPDATE FROM LINEITEM
where l_shipdate < '1996-01-01';
```
* DDL for creating external table with S3
```
CREATE EXTERNAL TABLE lineitem_p1 (
l_orderkey INT8, l_partkey INT8, l_suppkey INT8, l_linenumber INT8,
l_quantity FLOAT8,
l_extendedprice FLOAT8, l_discount FLOAT8, l_tax FLOAT8, l_returnflag TEXT,
l_linestatus TEXT,
l_commitdate DATE, l_receiptdate DATE, l_shipinstruct TEXT, l_shipmode
TEXT, l_comment TEXT
)
USING TEXT WITH ('text.delimiter'='|')
PARTITION BY COLUMN(l_shipdate text)
LOCATION 's3://jhjung-us/tpch/lineitem_p1';
CREATE EXTERNAL TABLE lineitem_p2 (
l_orderkey INT8, l_partkey INT8, l_suppkey INT8, l_linenumber INT8,
l_quantity FLOAT8,
l_extendedprice FLOAT8, l_discount FLOAT8, l_tax FLOAT8, l_returnflag TEXT,
l_linestatus TEXT,
l_commitdate DATE, l_receiptdate DATE, l_shipinstruct TEXT, l_shipmode
TEXT, l_comment TEXT
)
USING TEXT WITH ('text.delimiter'='|')
PARTITION BY COLUMN(l_shipdate text)
LOCATION 's3://jhjung-us/tpch/lineitem_p2';
CREATE EXTERNAL TABLE lineitem_p3 (
l_orderkey INT8, l_partkey INT8, l_suppkey INT8, l_linenumber INT8,
l_quantity FLOAT8,
l_extendedprice FLOAT8, l_discount FLOAT8, l_tax FLOAT8, l_returnflag TEXT,
l_linestatus TEXT,
l_commitdate DATE, l_receiptdate DATE, l_shipinstruct TEXT, l_shipmode
TEXT, l_comment TEXT
)
USING TEXT WITH ('text.delimiter'='|')
PARTITION BY COLUMN(l_shipdate text)
LOCATION 's3://jhjung-us/tpch/lineitem_p3';
CREATE EXTERNAL TABLE lineitem_p4 (
l_orderkey INT8, l_partkey INT8, l_suppkey INT8, l_linenumber INT8,
l_quantity FLOAT8,
l_extendedprice FLOAT8, l_discount FLOAT8, l_tax FLOAT8, l_returnflag TEXT,
l_linestatus TEXT,
l_commitdate DATE, l_receiptdate DATE, l_shipinstruct TEXT, l_shipmode
TEXT, l_comment TEXT
)
USING TEXT WITH ('text.delimiter'='|')
PARTITION BY COLUMN(l_shipdate text)
LOCATION 's3://jhjung-us/tpch/lineitem_p4';
CREATE EXTERNAL TABLE lineitem_p5 (
l_orderkey INT8, l_partkey INT8, l_suppkey INT8, l_linenumber INT8,
l_quantity FLOAT8,
l_extendedprice FLOAT8, l_discount FLOAT8, l_tax FLOAT8, l_returnflag TEXT,
l_linestatus TEXT,
l_commitdate DATE, l_receiptdate DATE, l_shipinstruct TEXT, l_shipmode
TEXT, l_comment TEXT
)
USING TEXT WITH ('text.delimiter'='|')
PARTITION BY COLUMN(l_shipdate text)
LOCATION 's3://jhjung-us/tpch/lineitem_p5';
```
> Implement finding the total size of all objects in a bucket with AWS SDK.
> -------------------------------------------------------------------------
>
> Key: TAJO-2069
> URL: https://issues.apache.org/jira/browse/TAJO-2069
> Project: Tajo
> Issue Type: Sub-task
> Components: Catalog, QueryMaster, S3, Storage
> Reporter: Jaehwa Jung
> Assignee: Jaehwa Jung
> Fix For: 0.12.0
>
>
> See the title and TAJO-2023.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)