[ 
https://issues.apache.org/jira/browse/TAJO-1144?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14285198#comment-14285198
 ] 

Jaehwa Jung commented on TAJO-1144:
-----------------------------------

Hi  [~hyunsik]

This issue looks like already fixed. Currently, StoreTableExec::openNewFile 
creates an output schema according to logical plan. Also Tajo wrote multiple 
output files to a given maximum file size on my testing cluster as follows:

{code:xml}
default> \set MAX_OUTPUT_FILE_SIZE 20

default> select count(*) from airline_delay_raw;
?count
-------------------------------
1311826

default> insert overwrite into airline_delay_temp select * from 
airline_delay_raw;

default> select count(*) from airline_delay_temp;
?count
-------------------------------
1311826

default> \dfs -ls /tajo/warehouse/default/airline_delay_temp
Found 6 items
-rw-r--r--   1 blrunner supergroup   20971588 2015-01-21 12:17 
/tajo/warehouse/default/airline_delay_temp/part-01-000000-000
-rw-r--r--   1 blrunner supergroup   20971574 2015-01-21 12:17 
/tajo/warehouse/default/airline_delay_temp/part-01-000000-000_1
-rw-r--r--   1 blrunner supergroup   20971563 2015-01-21 12:17 
/tajo/warehouse/default/airline_delay_temp/part-01-000000-000_2
-rw-r--r--   1 blrunner supergroup   20971526 2015-01-21 12:17 
/tajo/warehouse/default/airline_delay_temp/part-01-000000-000_3
-rw-r--r--   1 blrunner supergroup   20971579 2015-01-21 12:17 
/tajo/warehouse/default/airline_delay_temp/part-01-000000-000_4
-rw-r--r--   1 blrunner supergroup   14212086 2015-01-21 12:17 
/tajo/warehouse/default/airline_delay_temp/part-01-000000-000_5

default> insert into airline_delay_temp select * from airline_delay_raw;

default> select count(*) from airline_delay_temp;
?count
-------------------------------
2623652

default> \dfs -ls /tajo/warehouse/default/airline_delay_temp
Found 12 items
-rw-r--r--   1 blrunner supergroup     20.0 M 2015-01-21 12:17 
/tajo/warehouse/default/airline_delay_temp/part-01-000000-000
-rw-r--r--   1 blrunner supergroup     20.0 M 2015-01-21 12:17 
/tajo/warehouse/default/airline_delay_temp/part-01-000000-000_1
-rw-r--r--   1 blrunner supergroup     20.0 M 2015-01-21 12:17 
/tajo/warehouse/default/airline_delay_temp/part-01-000000-000_2
-rw-r--r--   1 blrunner supergroup     20.0 M 2015-01-21 12:17 
/tajo/warehouse/default/airline_delay_temp/part-01-000000-000_3
-rw-r--r--   1 blrunner supergroup     20.0 M 2015-01-21 12:17 
/tajo/warehouse/default/airline_delay_temp/part-01-000000-000_4
-rw-r--r--   1 blrunner supergroup     13.6 M 2015-01-21 12:17 
/tajo/warehouse/default/airline_delay_temp/part-01-000000-000_5
-rw-r--r--   1 blrunner supergroup     20.0 M 2015-01-21 12:21 
/tajo/warehouse/default/airline_delay_temp/part-01-000000-001
-rw-r--r--   1 blrunner supergroup     20.0 M 2015-01-21 12:21 
/tajo/warehouse/default/airline_delay_temp/part-01-000000-002
-rw-r--r--   1 blrunner supergroup     20.0 M 2015-01-21 12:21 
/tajo/warehouse/default/airline_delay_temp/part-01-000000-003
-rw-r--r--   1 blrunner supergroup     20.0 M 2015-01-21 12:21 
/tajo/warehouse/default/airline_delay_temp/part-01-000000-004
-rw-r--r--   1 blrunner supergroup     20.0 M 2015-01-21 12:21 
/tajo/warehouse/default/airline_delay_temp/part-01-000000-005
-rw-r--r--   1 blrunner supergroup     13.6 M 2015-01-21 12:21 
/tajo/warehouse/default/airline_delay_temp/part-01-000000-006

{code} 

> INSERT INTO with output rotation may overwrite previous written output file.
> ----------------------------------------------------------------------------
>
>                 Key: TAJO-1144
>                 URL: https://issues.apache.org/jira/browse/TAJO-1144
>             Project: Tajo
>          Issue Type: Improvement
>          Components: physical operator
>            Reporter: Hyunsik Choi
>            Assignee: Jaehwa Jung
>            Priority: Critical
>             Fix For: 0.10
>
>
> Tajo can write multiple output files according to a given maximum file size. 
> We can this feature 'output file rotation'.
> BTW, INSERT with output file rotation may overwrite the previous written 
> output file. See the source code in StoreTableExec.
> {noformat}
> public void openNewFile(int suffixId) throws IOException {
>     String prevFile = null;
>     lastFileName = context.getOutputPath();
>     if (suffixId > 0) {
>       prevFile = lastFileName.toString();
>       lastFileName = new Path(lastFileName + "_" + suffixId);
>     }
>     if (plan instanceof InsertNode) {
>       InsertNode createTableNode = (InsertNode) plan;
>       appender = 
> StorageManager.getStorageManager(context.getConf()).getAppender(meta,
>           createTableNode.getTableSchema(), context.getOutputPath());
>     } else {
>       appender = 
> StorageManager.getStorageManager(context.getConf()).getAppender(meta, 
> outSchema, lastFileName);
>     }
>     appender.enableStats();
>     appender.init();
>     if (suffixId > 0) {
>       LOG.info(prevFile + " exceeds " + 
> SessionVars.MAX_OUTPUT_FILE_SIZE.keyname() + " (" + maxPerFileSize + " MB), " 
> +
>           "The remain output will be written into " + 
> lastFileName.toString());
>     }
>   }
> {noformat}
> Especially, {{context.getOutputPath\(\)}} should be lastFileName in the below 
> part.
> {noformat}
>     if (plan instanceof InsertNode) {
>       InsertNode createTableNode = (InsertNode) plan;
>       appender = 
> StorageManager.getStorageManager(context.getConf()).getAppender(meta,
>           createTableNode.getTableSchema(), context.getOutputPath());
>     } else {
>       appender = 
> StorageManager.getStorageManager(context.getConf()).getAppender(meta, 
> outSchema, lastFileName);
>     }
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to