http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinOnPartitionedTables.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinOnPartitionedTables.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinOnPartitionedTables.java index f605ccf..fdaac1d 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinOnPartitionedTables.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinOnPartitionedTables.java @@ -18,7 +18,6 @@ package org.apache.tajo.engine.query; -import com.google.protobuf.ServiceException; import org.apache.tajo.IntegrationTest; import org.apache.tajo.NamedTest; import org.apache.tajo.TajoTestingCluster; @@ -225,8 +224,8 @@ public class TestJoinOnPartitionedTables extends TestJoinQuery { String tableName = CatalogUtil.normalizeIdentifier("paritioned_nation"); ResultSet res = executeString( "create table " + tableName + " (n_name text," - + " n_comment text, n_regionkey int8) USING csv " - + "WITH ('csvfile.delimiter'='|')" + + " n_comment text, n_regionkey int8) USING text " + + "WITH ('text.delimiter'='|')" + "PARTITION BY column(n_nationkey int8)"); res.close(); assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName)); @@ -264,7 +263,7 @@ public class TestJoinOnPartitionedTables extends TestJoinQuery { See the following case. CREATE TABLE orders_partition (o_orderkey INT8, o_custkey INT8, o_totalprice FLOAT8, o_orderpriority TEXT, - o_clerk TEXT, o_shippriority INT4, o_comment TEXT) USING CSV WITH ('csvfile.delimiter'='|') + o_clerk TEXT, o_shippriority INT4, o_comment TEXT) USING TEXT WITH ('text.delimiter'='|') PARTITION BY COLUMN(o_orderdate TEXT, o_orderstatus TEXT); select a.o_orderstatus, count(*) as cnt @@ -283,7 +282,7 @@ public class TestJoinOnPartitionedTables extends TestJoinQuery { String tableName = CatalogUtil.normalizeIdentifier("partitioned_orders"); ResultSet res = executeString( "create table " + tableName + " (o_orderkey INT8, o_custkey INT8, o_totalprice FLOAT8, o_orderpriority TEXT,\n" + - "o_clerk TEXT, o_shippriority INT4, o_comment TEXT) USING CSV WITH ('csvfile.delimiter'='|')\n" + + "o_clerk TEXT, o_shippriority INT4, o_comment TEXT) USING TEXT WITH ('text.delimiter'='|')\n" + "PARTITION BY COLUMN(o_orderdate TEXT, o_orderstatus TEXT, o_orderkey_mod INT8)"); res.close(); assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName));
http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java index 77bfca6..265c726 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java @@ -592,8 +592,8 @@ public class TestTablePartitions extends QueryTestCaseBase { if (nodeType == NodeType.INSERT) { res = executeString( - "create table " + tableName + " (col2 int4, col3 float8) USING csv " + - "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + + "create table " + tableName + " (col2 int4, col3 float8) USING text " + + "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "PARTITION BY column(col1 int4)"); res.close(); assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName)); @@ -602,8 +602,8 @@ public class TestTablePartitions extends QueryTestCaseBase { "insert overwrite into " + tableName + " select l_partkey, l_quantity, l_orderkey from lineitem"); } else { res = executeString( - "create table " + tableName + " (col2 int4, col3 float8) USING csv " + - "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + + "create table " + tableName + " (col2 int4, col3 float8) USING text " + + "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "PARTITION BY column(col1 int4) as select l_partkey, l_quantity, l_orderkey from lineitem"); } res.close(); @@ -639,8 +639,8 @@ public class TestTablePartitions extends QueryTestCaseBase { String tableName = CatalogUtil.normalizeIdentifier("testColumnPartitionedTableByTwoColumnsWithCompression"); if (nodeType == NodeType.INSERT) { - res = executeString("create table " + tableName + " (col3 float8, col4 text) USING csv " + - "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + + res = executeString("create table " + tableName + " (col3 float8, col4 text) USING text " + + "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "PARTITION by column(col1 int4, col2 int4)"); res.close(); @@ -650,8 +650,8 @@ public class TestTablePartitions extends QueryTestCaseBase { "insert overwrite into " + tableName + " select l_quantity, l_returnflag, l_orderkey, l_partkey from lineitem"); } else { - res = executeString("create table " + tableName + " (col3 float8, col4 text) USING csv " + - "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + + res = executeString("create table " + tableName + " (col3 float8, col4 text) USING text " + + "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "PARTITION by column(col1 int4, col2 int4) as select l_quantity, l_returnflag, l_orderkey, " + "l_partkey from lineitem"); } @@ -696,8 +696,8 @@ public class TestTablePartitions extends QueryTestCaseBase { if (nodeType == NodeType.INSERT) { res = executeString( - "create table " + tableName + " (col4 text) USING csv " + - "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + + "create table " + tableName + " (col4 text) USING text " + + "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "partition by column(col1 int4, col2 int4, col3 float8)"); res.close(); @@ -707,8 +707,8 @@ public class TestTablePartitions extends QueryTestCaseBase { "insert overwrite into " + tableName + " select l_returnflag, l_orderkey, l_partkey, l_quantity from lineitem"); } else { - res = executeString("create table " + tableName + " (col4 text) USING csv " + - "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + + res = executeString("create table " + tableName + " (col4 text) USING text " + + "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "partition by column(col1 int4, col2 int4, col3 float8) as select l_returnflag, l_orderkey, l_partkey, " + "l_quantity from lineitem"); } @@ -791,8 +791,8 @@ public class TestTablePartitions extends QueryTestCaseBase { if (nodeType == NodeType.INSERT) { res = executeString( - "create table " + tableName + " (col4 text) USING csv " + - "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + + "create table " + tableName + " (col4 text) USING text " + + "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "partition by column(col1 int4, col2 int4, col3 float8)"); res.close(); @@ -802,8 +802,8 @@ public class TestTablePartitions extends QueryTestCaseBase { "insert overwrite into " + tableName + " select l_returnflag , l_orderkey, l_partkey, l_quantity from lineitem"); } else { - res = executeString("create table " + tableName + " (col4 text) USING csv " + - "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + + res = executeString("create table " + tableName + " (col4 text) USING text " + + "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " + "partition by column(col1 int4, col2 int4, col3 float8) as select l_returnflag , l_orderkey, l_partkey, " + "l_quantity from lineitem"); } http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/java/org/apache/tajo/jdbc/TestResultSet.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/java/org/apache/tajo/jdbc/TestResultSet.java b/tajo-core/src/test/java/org/apache/tajo/jdbc/TestResultSet.java index 3d32c08..0c83fd0 100644 --- a/tajo-core/src/test/java/org/apache/tajo/jdbc/TestResultSet.java +++ b/tajo-core/src/test/java/org/apache/tajo/jdbc/TestResultSet.java @@ -71,7 +71,7 @@ public class TestResultSet { scoreSchema = new Schema(); scoreSchema.addColumn("deptname", Type.TEXT); scoreSchema.addColumn("score", Type.INT4); - scoreMeta = CatalogUtil.newTableMeta("CSV"); + scoreMeta = CatalogUtil.newTableMeta("TEXT"); TableStats stats = new TableStats(); Path p = new Path(sm.getTableUri("default", "score")); http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/java/org/apache/tajo/master/TestExecutionBlockCursor.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/java/org/apache/tajo/master/TestExecutionBlockCursor.java b/tajo-core/src/test/java/org/apache/tajo/master/TestExecutionBlockCursor.java index 7c61cc7..169f375 100644 --- a/tajo-core/src/test/java/org/apache/tajo/master/TestExecutionBlockCursor.java +++ b/tajo-core/src/test/java/org/apache/tajo/master/TestExecutionBlockCursor.java @@ -69,7 +69,7 @@ public class TestExecutionBlockCursor { tpch.loadSchemas(); tpch.loadOutSchema(); for (String table : tpch.getTableNames()) { - TableMeta m = CatalogUtil.newTableMeta("CSV"); + TableMeta m = CatalogUtil.newTableMeta("TEXT"); TableDesc d = CatalogUtil.newTableDesc( CatalogUtil.buildFQName(DEFAULT_DATABASE_NAME, table), tpch.getSchema(table), m, CommonTestingUtil.getTestDir()); TableStats stats = new TableStats(); http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestAlterTable/table1_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestAlterTable/table1_ddl.sql b/tajo-core/src/test/resources/queries/TestAlterTable/table1_ddl.sql index 8d7fba0..f00f593 100644 --- a/tajo-core/src/test/resources/queries/TestAlterTable/table1_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestAlterTable/table1_ddl.sql @@ -1 +1 @@ -CREATE EXTERNAL TABLE ${0} (id int, str text, num int) using csv location ${table.path}; +CREATE EXTERNAL TABLE ${0} (id int, str text, num int) using text location ${table.path}; http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestAlterTable/table2_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestAlterTable/table2_ddl.sql b/tajo-core/src/test/resources/queries/TestAlterTable/table2_ddl.sql index ae563e7..887029d 100644 --- a/tajo-core/src/test/resources/queries/TestAlterTable/table2_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestAlterTable/table2_ddl.sql @@ -1 +1 @@ -CREATE EXTERNAL TABLE ${0} (xx text, yy text, zz text) USING CSV WITH('text.delimiter'='+') LOCATION ${table.path}; +CREATE EXTERNAL TABLE ${0} (xx text, yy text, zz text) USING TEXT WITH('text.delimiter'='+') LOCATION ${table.path}; http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestCTASQuery/CtasWithOptions.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestCTASQuery/CtasWithOptions.sql b/tajo-core/src/test/resources/queries/TestCTASQuery/CtasWithOptions.sql index a393b9d..b7dde15 100644 --- a/tajo-core/src/test/resources/queries/TestCTASQuery/CtasWithOptions.sql +++ b/tajo-core/src/test/resources/queries/TestCTASQuery/CtasWithOptions.sql @@ -1,5 +1,5 @@ create table testCtasWithOptions (col1 float, col2 float) -using csv with ('csvfile.delimiter'='\u0001') as +using text with ('text.delimiter'='\u0001') as select sum(l_orderkey) as total1, avg(l_partkey) as total2 http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestCreateTable/quoted_identifier_mixed_chars_ddl_1.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestCreateTable/quoted_identifier_mixed_chars_ddl_1.sql b/tajo-core/src/test/resources/queries/TestCreateTable/quoted_identifier_mixed_chars_ddl_1.sql index f93e1e6..d8293d0 100644 --- a/tajo-core/src/test/resources/queries/TestCreateTable/quoted_identifier_mixed_chars_ddl_1.sql +++ b/tajo-core/src/test/resources/queries/TestCreateTable/quoted_identifier_mixed_chars_ddl_1.sql @@ -1 +1 @@ -CREATE EXTERNAL TABLE ${0} ("aGe" int, "teXt" text, "Number" int) using csv location ${table.path}; \ No newline at end of file +CREATE EXTERNAL TABLE ${0} ("aGe" int, "teXt" text, "Number" int) using text location ${table.path}; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestCreateTable/quoted_identifier_mixed_chars_ddl_2.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestCreateTable/quoted_identifier_mixed_chars_ddl_2.sql b/tajo-core/src/test/resources/queries/TestCreateTable/quoted_identifier_mixed_chars_ddl_2.sql index f93e1e6..d8293d0 100644 --- a/tajo-core/src/test/resources/queries/TestCreateTable/quoted_identifier_mixed_chars_ddl_2.sql +++ b/tajo-core/src/test/resources/queries/TestCreateTable/quoted_identifier_mixed_chars_ddl_2.sql @@ -1 +1 @@ -CREATE EXTERNAL TABLE ${0} ("aGe" int, "teXt" text, "Number" int) using csv location ${table.path}; \ No newline at end of file +CREATE EXTERNAL TABLE ${0} ("aGe" int, "teXt" text, "Number" int) using text location ${table.path}; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestCreateTable/quoted_identifier_non_ascii_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestCreateTable/quoted_identifier_non_ascii_ddl.sql b/tajo-core/src/test/resources/queries/TestCreateTable/quoted_identifier_non_ascii_ddl.sql index c1f073d..1706d80 100644 --- a/tajo-core/src/test/resources/queries/TestCreateTable/quoted_identifier_non_ascii_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestCreateTable/quoted_identifier_non_ascii_ddl.sql @@ -1 +1 @@ -CREATE EXTERNAL TABLE ${0} ("ìì´ë" int, "í ì¤í¸" text, "ì«ì" int) using csv location ${table.path}; \ No newline at end of file +CREATE EXTERNAL TABLE ${0} ("ìì´ë" int, "í ì¤í¸" text, "ì«ì" int) using text location ${table.path}; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestCreateTable/table1_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestCreateTable/table1_ddl.sql b/tajo-core/src/test/resources/queries/TestCreateTable/table1_ddl.sql index 11ced2a..e26975a 100644 --- a/tajo-core/src/test/resources/queries/TestCreateTable/table1_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestCreateTable/table1_ddl.sql @@ -1 +1 @@ -CREATE EXTERNAL TABLE ${0} (id int, str text, num int) using csv location ${table.path}; \ No newline at end of file +CREATE EXTERNAL TABLE ${0} (id int, str text, num int) using text location ${table.path}; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestInsertQuery/testInsertOverwriteLocationWithCompression.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestInsertQuery/testInsertOverwriteLocationWithCompression.sql b/tajo-core/src/test/resources/queries/TestInsertQuery/testInsertOverwriteLocationWithCompression.sql index c3c67c7..3582403 100644 --- a/tajo-core/src/test/resources/queries/TestInsertQuery/testInsertOverwriteLocationWithCompression.sql +++ b/tajo-core/src/test/resources/queries/TestInsertQuery/testInsertOverwriteLocationWithCompression.sql @@ -1 +1 @@ -insert overwrite into location '/tajo-data/testInsertOverwriteLocationWithCompression' USING csv WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') select * from default.lineitem where l_orderkey = 3; \ No newline at end of file +insert overwrite into location '/tajo-data/testInsertOverwriteLocationWithCompression' USING text WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') select * from default.lineitem where l_orderkey = 3; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestInsertQuery/testInsertOverwriteWithCompression_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestInsertQuery/testInsertOverwriteWithCompression_ddl.sql b/tajo-core/src/test/resources/queries/TestInsertQuery/testInsertOverwriteWithCompression_ddl.sql index dd49f84..8e76b9e 100644 --- a/tajo-core/src/test/resources/queries/TestInsertQuery/testInsertOverwriteWithCompression_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestInsertQuery/testInsertOverwriteWithCompression_ddl.sql @@ -1,4 +1,4 @@ create table testInsertOverwriteWithCompression (col1 int4, col2 int4, col3 float8) -USING csv -WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec'); \ No newline at end of file +USING text +WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec'); \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestJoinQuery/create_customer_large_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestJoinQuery/create_customer_large_ddl.sql b/tajo-core/src/test/resources/queries/TestJoinQuery/create_customer_large_ddl.sql index e5be236..a9122f3 100644 --- a/tajo-core/src/test/resources/queries/TestJoinQuery/create_customer_large_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestJoinQuery/create_customer_large_ddl.sql @@ -4,4 +4,4 @@ create external table if not exists customer_large ( c_custkey INT4, c_name TEXT, c_address TEXT, c_nationkey INT4, c_phone TEXT, c_acctbal FLOAT8, c_mktsegment TEXT, c_comment TEXT) -using csv with ('csvfile.delimiter'='|', 'csvfile.null'='NULL') location ${table.path}; \ No newline at end of file +using text with ('text.delimiter'='|', 'text.null'='NULL') location ${table.path}; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestJoinQuery/create_lineitem_large_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestJoinQuery/create_lineitem_large_ddl.sql b/tajo-core/src/test/resources/queries/TestJoinQuery/create_lineitem_large_ddl.sql index 366f22b..f8d574f 100644 --- a/tajo-core/src/test/resources/queries/TestJoinQuery/create_lineitem_large_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestJoinQuery/create_lineitem_large_ddl.sql @@ -4,4 +4,4 @@ create external table if not exists lineitem_large ( l_orderkey INT4, l_partkey INT4, l_suppkey INT4, l_linenumber INT4, l_quantity FLOAT8, l_extendedprice FLOAT8, l_discount FLOAT8, l_tax FLOAT8, l_returnflag TEXT, l_linestatus TEXT, l_shipdate TEXT, l_commitdate TEXT, l_receiptdate TEXT, l_shipinstruct TEXT, l_shipmode TEXT, l_comment TEXT) -using csv with ('csvfile.delimiter'='|', 'csvfile.null'='NULL') location ${table.path}; \ No newline at end of file +using text with ('text.delimiter'='|', 'text.null'='NULL') location ${table.path}; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestJoinQuery/create_orders_large_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestJoinQuery/create_orders_large_ddl.sql b/tajo-core/src/test/resources/queries/TestJoinQuery/create_orders_large_ddl.sql index 4b9bb65..8f99e5e 100644 --- a/tajo-core/src/test/resources/queries/TestJoinQuery/create_orders_large_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestJoinQuery/create_orders_large_ddl.sql @@ -4,4 +4,4 @@ create external table if not exists orders_large ( o_orderkey INT4, o_custkey INT4, o_orderstatus TEXT, o_totalprice FLOAT8, o_orderdate TEXT, o_orderpriority TEXT, o_clerk TEXT, o_shippriority INT4, o_comment TEXT) -using csv with ('csvfile.delimiter'='|', 'csvfile.null'='NULL') location ${table.path}; \ No newline at end of file +using text with ('text.delimiter'='|', 'text.null'='NULL') location ${table.path}; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table1_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table1_ddl.sql b/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table1_ddl.sql index faf66f2..454581a 100644 --- a/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table1_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table1_ddl.sql @@ -1,3 +1,3 @@ -create external table testOuterJoinAndCaseWhen1 (id int, name text, score float, type text) using csv -with ('csvfile.delimiter'='|', 'csvfile.null'='NULL') location ${table.path}; +create external table testOuterJoinAndCaseWhen1 (id int, name text, score float, type text) using text +with ('text.delimiter'='|', 'text.null'='NULL') location ${table.path}; http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table2_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table2_ddl.sql b/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table2_ddl.sql index 5680663..ac1e1f6 100644 --- a/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table2_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestJoinQuery/oj_table2_ddl.sql @@ -1,3 +1,3 @@ -create external table testOuterJoinAndCaseWhen2 (id int, name text, score float, type text) using csv -with ('csvfile.delimiter'='|', 'csvfile.null'='NULL') location ${table.path}; +create external table testOuterJoinAndCaseWhen2 (id int, name text, score float, type text) using text +with ('text.delimiter'='|', 'text.null'='NULL') location ${table.path}; http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestJoinQuery/table1_int4_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestJoinQuery/table1_int4_ddl.sql b/tajo-core/src/test/resources/queries/TestJoinQuery/table1_int4_ddl.sql index 0d35cee..c991f71 100644 --- a/tajo-core/src/test/resources/queries/TestJoinQuery/table1_int4_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestJoinQuery/table1_int4_ddl.sql @@ -1,3 +1,3 @@ -create external table ${0} (id int, name text, score float, type text) using csv -with ('csvfile.delimiter'='|', 'csvfile.null'='NULL') location ${table.path}; +create external table ${0} (id int, name text, score float, type text) using text +with ('text.delimiter'='|', 'text.null'='NULL') location ${table.path}; http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestJoinQuery/table1_int8_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestJoinQuery/table1_int8_ddl.sql b/tajo-core/src/test/resources/queries/TestJoinQuery/table1_int8_ddl.sql index 3a7a44a..8f0b8de 100644 --- a/tajo-core/src/test/resources/queries/TestJoinQuery/table1_int8_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestJoinQuery/table1_int8_ddl.sql @@ -1,3 +1,3 @@ -create external table ${0} (id bigint, name text, score float, type text) using csv -with ('csvfile.delimiter'='|', 'csvfile.null'='NULL') location ${table.path}; +create external table ${0} (id bigint, name text, score float, type text) using text +with ('text.delimiter'='|', 'text.null'='NULL') location ${table.path}; http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestNetTypes/table1_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestNetTypes/table1_ddl.sql b/tajo-core/src/test/resources/queries/TestNetTypes/table1_ddl.sql index c3ee88e..b4cc87e 100644 --- a/tajo-core/src/test/resources/queries/TestNetTypes/table1_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestNetTypes/table1_ddl.sql @@ -1,4 +1,4 @@ -- It is used in TestNetTypes -create external table IF NOT EXISTS table1 (id int, name text, score float, type text, addr inet4) using csv -with ('csvfile.delimiter'='|', 'csvfile.null'='NULL') location ${table.path}; \ No newline at end of file +create external table IF NOT EXISTS table1 (id int, name text, score float, type text, addr inet4) using text +with ('text.delimiter'='|', 'text.null'='NULL') location ${table.path}; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestNetTypes/table2_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestNetTypes/table2_ddl.sql b/tajo-core/src/test/resources/queries/TestNetTypes/table2_ddl.sql index 30bfafe..40f0464 100644 --- a/tajo-core/src/test/resources/queries/TestNetTypes/table2_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestNetTypes/table2_ddl.sql @@ -1,4 +1,4 @@ -- It is used in TestNetTypes -create external table IF NOT EXISTS table2 (id int, name text, score float, type text, addr inet4) using csv -with ('csvfile.delimiter'='|', 'csvfile.null'='NULL') location ${table.path}; \ No newline at end of file +create external table IF NOT EXISTS table2 (id int, name text, score float, type text, addr inet4) using text +with ('text.delimiter'='|', 'text.null'='NULL') location ${table.path}; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestSelectQuery/multibytes_delimiter_table1_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestSelectQuery/multibytes_delimiter_table1_ddl.sql b/tajo-core/src/test/resources/queries/TestSelectQuery/multibytes_delimiter_table1_ddl.sql index 2b4a2ce..8309d11 100644 --- a/tajo-core/src/test/resources/queries/TestSelectQuery/multibytes_delimiter_table1_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestSelectQuery/multibytes_delimiter_table1_ddl.sql @@ -1,3 +1,3 @@ -create external table table1 (id int, name text, score float, type text) using csv -with ('csvfile.delimiter'='||', 'csvfile.null'='NULL') location ${table.path}; +create external table table1 (id int, name text, score float, type text) using text +with ('text.delimiter'='||', 'text.null'='NULL') location ${table.path}; http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestSelectQuery/multibytes_delimiter_table2_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestSelectQuery/multibytes_delimiter_table2_ddl.sql b/tajo-core/src/test/resources/queries/TestSelectQuery/multibytes_delimiter_table2_ddl.sql index d918ac6..2fb821a 100644 --- a/tajo-core/src/test/resources/queries/TestSelectQuery/multibytes_delimiter_table2_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestSelectQuery/multibytes_delimiter_table2_ddl.sql @@ -1,3 +1,3 @@ -create external table table2 (id int, name text, score float, type text) using csv -with ('csvfile.delimiter'='ã ', 'csvfile.null'='NULL') location ${table.path}; +create external table table2 (id int, name text, score float, type text) using text +with ('text.delimiter'='ã ', 'text.null'='NULL') location ${table.path}; http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestSortQuery/create_table_with_asc_desc_keys.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestSortQuery/create_table_with_asc_desc_keys.sql b/tajo-core/src/test/resources/queries/TestSortQuery/create_table_with_asc_desc_keys.sql index 3ffa2b3..936e5ed 100644 --- a/tajo-core/src/test/resources/queries/TestSortQuery/create_table_with_asc_desc_keys.sql +++ b/tajo-core/src/test/resources/queries/TestSortQuery/create_table_with_asc_desc_keys.sql @@ -1 +1 @@ -create external table table2 (col1 int8, col2 int8) using csv with ('csvfile.delimiter'=',') location ${table.path}; \ No newline at end of file +create external table table2 (col1 int8, col2 int8) using text with ('text.delimiter'=',') location ${table.path}; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestSortQuery/create_table_with_date_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestSortQuery/create_table_with_date_ddl.sql b/tajo-core/src/test/resources/queries/TestSortQuery/create_table_with_date_ddl.sql index 322b35c..3ba63e0 100644 --- a/tajo-core/src/test/resources/queries/TestSortQuery/create_table_with_date_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestSortQuery/create_table_with_date_ddl.sql @@ -5,6 +5,6 @@ create external table testSortWithDate ( col1 timestamp, col2 date, col3 time -) using csv -with ('csvfile.delimiter'='|', 'csvfile.null'='NULL') +) using text +with ('text.delimiter'='|', 'text.null'='NULL') location ${table.path}; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestTablePartitions/lineitemspecial_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestTablePartitions/lineitemspecial_ddl.sql b/tajo-core/src/test/resources/queries/TestTablePartitions/lineitemspecial_ddl.sql index 77e76d5..ac49b23 100644 --- a/tajo-core/src/test/resources/queries/TestTablePartitions/lineitemspecial_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestTablePartitions/lineitemspecial_ddl.sql @@ -1,3 +1,3 @@ create external table if not exists lineitemspecial ( l_orderkey INT4, l_shipinstruct TEXT, l_shipmode TEXT) -using csv with ('csvfile.delimiter'='|', 'csvfile.null'='NULL') location ${table.path}; +using text with ('text.delimiter'='|', 'text.null'='NULL') location ${table.path}; http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestTajoJdbc/create_table_with_date_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestTajoJdbc/create_table_with_date_ddl.sql b/tajo-core/src/test/resources/queries/TestTajoJdbc/create_table_with_date_ddl.sql index 846cbb6..caeeaf9 100644 --- a/tajo-core/src/test/resources/queries/TestTajoJdbc/create_table_with_date_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestTajoJdbc/create_table_with_date_ddl.sql @@ -5,6 +5,6 @@ create external table table1 ( col1 timestamp, col2 date, col3 time -) using csv -with ('csvfile.delimiter'='|', 'csvfile.null'='NULL') +) using text +with ('text.delimiter'='|', 'text.null'='NULL') location ${table.path}; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/TestTruncateTable/table2_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestTruncateTable/table2_ddl.sql b/tajo-core/src/test/resources/queries/TestTruncateTable/table2_ddl.sql index 8d7fba0..f00f593 100644 --- a/tajo-core/src/test/resources/queries/TestTruncateTable/table2_ddl.sql +++ b/tajo-core/src/test/resources/queries/TestTruncateTable/table2_ddl.sql @@ -1 +1 @@ -CREATE EXTERNAL TABLE ${0} (id int, str text, num int) using csv location ${table.path}; +CREATE EXTERNAL TABLE ${0} (id int, str text, num int) using text location ${table.path}; http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/queries/default/create_table_12.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/default/create_table_12.sql b/tajo-core/src/test/resources/queries/default/create_table_12.sql index 0816084..99e7b8d 100644 --- a/tajo-core/src/test/resources/queries/default/create_table_12.sql +++ b/tajo-core/src/test/resources/queries/default/create_table_12.sql @@ -1,3 +1,3 @@ create external table table1 (name text, age int) -USING csv WITH ('csvfile.delimiter'='|') +USING text WITH ('text.delimiter'='|') location '/user/hive/table1' \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/results/TestTajoCli/testDescTable.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestTajoCli/testDescTable.result b/tajo-core/src/test/resources/results/TestTajoCli/testDescTable.result index d3800ab..f065e6e 100644 --- a/tajo-core/src/test/resources/results/TestTajoCli/testDescTable.result +++ b/tajo-core/src/test/resources/results/TestTajoCli/testDescTable.result @@ -2,7 +2,7 @@ OK table name: default.TEST_DESC_TABLE table uri: ${table.path} -store type: CSV +store type: TEXT number of rows: 0 volume: 0 B Options: @@ -16,7 +16,7 @@ col2 INT4 table name: default.TEST_DESC_TABLE table uri: ${table.path} -store type: CSV +store type: TEXT number of rows: 0 volume: 0 B Options: http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/results/TestTajoCli/testDescTableForNestedSchema.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestTajoCli/testDescTableForNestedSchema.result b/tajo-core/src/test/resources/results/TestTajoCli/testDescTableForNestedSchema.result index 7eff4af..29106b3 100644 --- a/tajo-core/src/test/resources/results/TestTajoCli/testDescTableForNestedSchema.result +++ b/tajo-core/src/test/resources/results/TestTajoCli/testDescTableForNestedSchema.result @@ -2,7 +2,7 @@ OK table name: default.TEST_DESC_TABLE_NESTED table uri: ${table.path} -store type: CSV +store type: TEXT number of rows: 0 volume: 0 B Options: @@ -17,7 +17,7 @@ col3 RECORD (col4 RECORD (col5 TEXT)) table name: default.TEST_DESC_TABLE_NESTED table uri: ${table.path} -store type: CSV +store type: TEXT number of rows: 0 volume: 0 B Options: http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/results/TestTajoDump/testDump1.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestTajoDump/testDump1.result b/tajo-core/src/test/resources/results/TestTajoDump/testDump1.result index 326ba3f..8accece 100644 --- a/tajo-core/src/test/resources/results/TestTajoDump/testDump1.result +++ b/tajo-core/src/test/resources/results/TestTajoDump/testDump1.result @@ -10,7 +10,7 @@ CREATE DATABASE IF NOT EXISTS "TestTajoDump"; -- --- Name: "TestTajoDump"."TableName1"; Type: TABLE; Storage: CSV +-- Name: "TestTajoDump"."TableName1"; Type: TABLE; Storage: TEXT -- -CREATE TABLE "TestTajoDump"."TableName1" ("Age" INT4, "FirstName" TEXT, lastname TEXT) USING CSV WITH ('text.delimiter'='|'); +CREATE TABLE "TestTajoDump"."TableName1" ("Age" INT4, "FirstName" TEXT, lastname TEXT) USING TEXT WITH ('text.delimiter'='|'); http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/results/TestTajoDump/testDump2.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestTajoDump/testDump2.result b/tajo-core/src/test/resources/results/TestTajoDump/testDump2.result index 6c15e3e..787562e 100644 --- a/tajo-core/src/test/resources/results/TestTajoDump/testDump2.result +++ b/tajo-core/src/test/resources/results/TestTajoDump/testDump2.result @@ -10,7 +10,7 @@ CREATE DATABASE IF NOT EXISTS "TestTajoDump"; -- --- Name: "TestTajoDump"."TableName2"; Type: TABLE; Storage: CSV +-- Name: "TestTajoDump"."TableName2"; Type: TABLE; Storage: TEXT -- -CREATE TABLE "TestTajoDump"."TableName2" ("Age" INT4, "Name" RECORD ("FirstName" TEXT, lastname TEXT)) USING CSV WITH ('text.delimiter'='|'); +CREATE TABLE "TestTajoDump"."TableName2" ("Age" INT4, "Name" RECORD ("FirstName" TEXT, lastname TEXT)) USING TEXT WITH ('text.delimiter'='|'); http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLForBaseTable.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLForBaseTable.result b/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLForBaseTable.result index b1172a7..7e10a3b 100644 --- a/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLForBaseTable.result +++ b/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLForBaseTable.result @@ -1,4 +1,4 @@ -- --- Name: db1.table2; Type: TABLE; Storage: CSV +-- Name: db1.table2; Type: TABLE; Storage: TEXT -- -CREATE TABLE db1.table2 (name BLOB, addr TEXT) USING CSV WITH ('compression.codec'='org.apache.hadoop.io.compress.GzipCodec', 'text.delimiter'='|'); \ No newline at end of file +CREATE TABLE db1.table2 (name BLOB, addr TEXT) USING TEXT WITH ('compression.codec'='org.apache.hadoop.io.compress.GzipCodec', 'text.delimiter'='|'); \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLForExternalTable.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLForExternalTable.result b/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLForExternalTable.result index a5b5f63..535bc11 100644 --- a/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLForExternalTable.result +++ b/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLForExternalTable.result @@ -1,5 +1,5 @@ -- --- Name: db1.table1; Type: TABLE; Storage: CSV +-- Name: db1.table1; Type: TABLE; Storage: TEXT -- Path: /table1 -- -CREATE EXTERNAL TABLE db1.table1 (name BLOB, addr TEXT) USING CSV WITH ('compression.codec'='org.apache.hadoop.io.compress.GzipCodec', 'text.delimiter'='|') PARTITION BY COLUMN(key INT4, key2 TEXT) LOCATION '/table1'; \ No newline at end of file +CREATE EXTERNAL TABLE db1.table1 (name BLOB, addr TEXT) USING TEXT WITH ('compression.codec'='org.apache.hadoop.io.compress.GzipCodec', 'text.delimiter'='|') PARTITION BY COLUMN(key INT4, key2 TEXT) LOCATION '/table1'; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLQuotedTableName1.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLQuotedTableName1.result b/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLQuotedTableName1.result index 43be915..34b6fb3 100644 --- a/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLQuotedTableName1.result +++ b/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLQuotedTableName1.result @@ -1,5 +1,5 @@ -- --- Name: db1."TABLE2"; Type: TABLE; Storage: CSV +-- Name: db1."TABLE2"; Type: TABLE; Storage: TEXT -- Path: /table1 -- -CREATE EXTERNAL TABLE db1."TABLE2" (name BLOB, addr TEXT, "FirstName" TEXT, "LastName" TEXT, "with" TEXT) USING CSV WITH ('compression.codec'='org.apache.hadoop.io.compress.GzipCodec', 'text.delimiter'='|') PARTITION BY COLUMN("BirthYear" INT4) LOCATION '/table1'; \ No newline at end of file +CREATE EXTERNAL TABLE db1."TABLE2" (name BLOB, addr TEXT, "FirstName" TEXT, "LastName" TEXT, "with" TEXT) USING TEXT WITH ('compression.codec'='org.apache.hadoop.io.compress.GzipCodec', 'text.delimiter'='|') PARTITION BY COLUMN("BirthYear" INT4) LOCATION '/table1'; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLQuotedTableName2.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLQuotedTableName2.result b/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLQuotedTableName2.result index 764892f..cd1ebf8 100644 --- a/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLQuotedTableName2.result +++ b/tajo-core/src/test/resources/results/testDDLBuilder/testBuildDDLQuotedTableName2.result @@ -1,4 +1,4 @@ -- --- Name: db1."TABLE1"; Type: TABLE; Storage: CSV +-- Name: db1."TABLE1"; Type: TABLE; Storage: TEXT -- -CREATE TABLE db1."TABLE1" (name BLOB, addr TEXT, "FirstName" TEXT, "LastName" TEXT, "with" TEXT) USING CSV WITH ('compression.codec'='org.apache.hadoop.io.compress.GzipCodec', 'text.delimiter'='|') PARTITION BY COLUMN("BirthYear" INT4); \ No newline at end of file +CREATE TABLE db1."TABLE1" (name BLOB, addr TEXT, "FirstName" TEXT, "LastName" TEXT, "with" TEXT) USING TEXT WITH ('compression.codec'='org.apache.hadoop.io.compress.GzipCodec', 'text.delimiter'='|') PARTITION BY COLUMN("BirthYear" INT4); \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-docs/src/main/sphinx/backup_and_restore/catalog.rst ---------------------------------------------------------------------- diff --git a/tajo-docs/src/main/sphinx/backup_and_restore/catalog.rst b/tajo-docs/src/main/sphinx/backup_and_restore/catalog.rst index 1c2b709..5676b17 100644 --- a/tajo-docs/src/main/sphinx/backup_and_restore/catalog.rst +++ b/tajo-docs/src/main/sphinx/backup_and_restore/catalog.rst @@ -25,7 +25,7 @@ For example, if you want to backup a table customer, you should type a command a -- -- - -- Name: customer; Type: TABLE; Storage: CSV + -- Name: customer; Type: TABLE; Storage: TEXT -- Path: file:/home/hyunsik/tpch/customer -- CREATE EXTERNAL TABLE customer (c_custkey INT8, c_name TEXT, c_address TEXT, c_nationkey INT8, c_phone TEXT, c_acctbal FLOAT8, c_mktsegment TEXT, c_comment TEXT) USING TEXT LOCATION 'file:/home/hyunsik/tpch/customer'; http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-docs/src/main/sphinx/getting_started.rst ---------------------------------------------------------------------- diff --git a/tajo-docs/src/main/sphinx/getting_started.rst b/tajo-docs/src/main/sphinx/getting_started.rst index e30c3fe..ecb013e 100644 --- a/tajo-docs/src/main/sphinx/getting_started.rst +++ b/tajo-docs/src/main/sphinx/getting_started.rst @@ -152,7 +152,7 @@ If you want to know DDL statements in more detail, please see Query Language. :: table name: table1 table path: file:/home/x/table1 - store type: CSV + store type: TEXT number of rows: 0 volume (bytes): 78 B schema: http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-docs/src/main/sphinx/index/how_to_use.rst ---------------------------------------------------------------------- diff --git a/tajo-docs/src/main/sphinx/index/how_to_use.rst b/tajo-docs/src/main/sphinx/index/how_to_use.rst index 776d205..09e1b72 100644 --- a/tajo-docs/src/main/sphinx/index/how_to_use.rst +++ b/tajo-docs/src/main/sphinx/index/how_to_use.rst @@ -18,7 +18,7 @@ If the index is created successfully, you can see the information about that ind table name: default.lineitem table path: hdfs://localhost:7020/tpch/lineitem - store type: CSV + store type: TEXT number of rows: unknown volume: 753.9 MB Options: http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-docs/src/main/sphinx/tsql/meta_command.rst ---------------------------------------------------------------------- diff --git a/tajo-docs/src/main/sphinx/tsql/meta_command.rst b/tajo-docs/src/main/sphinx/tsql/meta_command.rst index 057124d..7d687a1 100644 --- a/tajo-docs/src/main/sphinx/tsql/meta_command.rst +++ b/tajo-docs/src/main/sphinx/tsql/meta_command.rst @@ -80,7 +80,7 @@ Basic usages table name: orders table path: hdfs:/xxx/xxx/tpch/orders - store type: CSV + store type: TEXT number of rows: 0 volume (bytes): 172.0 MB schema: http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java ---------------------------------------------------------------------- diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java index 4a208c2..47ab9b1 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java @@ -24,11 +24,11 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.tajo.BuiltinStorages; import org.apache.tajo.OverridableConf; import org.apache.tajo.QueryVars; import org.apache.tajo.SessionVars; @@ -54,9 +54,6 @@ import org.apache.tajo.plan.nameresolver.NameResolvingMode; import org.apache.tajo.plan.rewrite.rules.ProjectionPushDownRule; import org.apache.tajo.plan.util.ExprFinder; import org.apache.tajo.plan.util.PlannerUtil; -import org.apache.tajo.catalog.SchemaUtil; -import org.apache.tajo.plan.verifier.SyntaxErrorUtil; -import org.apache.tajo.plan.verifier.VerifyException; import org.apache.tajo.storage.StorageService; import org.apache.tajo.util.KeyValueSet; import org.apache.tajo.util.Pair; @@ -1786,7 +1783,7 @@ public class LogicalPlanner extends BaseAlgebraVisitor<LogicalPlanner.PlanContex insertNode.setUri(targetUri); if (expr.hasStorageType()) { - insertNode.setStorageType(expr.getStorageType()); + insertNode.setStorageType(CatalogUtil.getBackwardCompitablityStoreType(expr.getStorageType())); } if (expr.hasParams()) { KeyValueSet options = new KeyValueSet(); @@ -1833,7 +1830,7 @@ public class LogicalPlanner extends BaseAlgebraVisitor<LogicalPlanner.PlanContex createTableNode.setTableSchema(baseTable.getSchema()); createTableNode.setPartitionMethod(partitionDesc); - createTableNode.setStorageType(baseTable.getMeta().getStoreType()); + createTableNode.setStorageType(CatalogUtil.getBackwardCompitablityStoreType(baseTable.getMeta().getStoreType())); createTableNode.setOptions(baseTable.getMeta().getOptions()); createTableNode.setExternal(baseTable.isExternal()); @@ -1869,9 +1866,9 @@ public class LogicalPlanner extends BaseAlgebraVisitor<LogicalPlanner.PlanContex createTableNode.setUri(getCreatedTableURI(context, expr)); if (expr.hasStorageType()) { // If storage type (using clause) is specified - createTableNode.setStorageType(expr.getStorageType()); + createTableNode.setStorageType(CatalogUtil.getBackwardCompitablityStoreType(expr.getStorageType())); } else { // otherwise, default type - createTableNode.setStorageType("CSV"); + createTableNode.setStorageType(BuiltinStorages.TEXT); } // Set default storage properties to table http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-plan/src/main/java/org/apache/tajo/plan/TablePropertyUtil.java ---------------------------------------------------------------------- diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/TablePropertyUtil.java b/tajo-plan/src/main/java/org/apache/tajo/plan/TablePropertyUtil.java index 2fe0cbe..62fa706 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/TablePropertyUtil.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/TablePropertyUtil.java @@ -41,7 +41,7 @@ public class TablePropertyUtil { String storeType = node.getStorageType(); KeyValueSet property = node.getOptions(); - if (storeType.equalsIgnoreCase("CSV") || storeType.equalsIgnoreCase("TEXT")) { + if (storeType.equalsIgnoreCase("TEXT") || storeType.equalsIgnoreCase("TEXT")) { setSessionToProperty(context, SessionVars.NULL_CHAR, property, StorageConstants.TEXT_NULL); } } http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PersistentStoreNode.java ---------------------------------------------------------------------- diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PersistentStoreNode.java b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PersistentStoreNode.java index 19a1820..c892eda 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PersistentStoreNode.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PersistentStoreNode.java @@ -31,7 +31,7 @@ import org.apache.tajo.util.TUtil; * This includes some basic information for materializing data. */ public abstract class PersistentStoreNode extends UnaryNode implements Cloneable { - @Expose protected String storageType = "CSV"; + @Expose protected String storageType = "TEXT"; @Expose protected KeyValueSet options = new KeyValueSet(); protected PersistentStoreNode(int pid, NodeType nodeType) { http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-plan/src/test/java/org/apache/tajo/plan/TestLogicalNode.java ---------------------------------------------------------------------- diff --git a/tajo-plan/src/test/java/org/apache/tajo/plan/TestLogicalNode.java b/tajo-plan/src/test/java/org/apache/tajo/plan/TestLogicalNode.java index 694eeeb..92cb399 100644 --- a/tajo-plan/src/test/java/org/apache/tajo/plan/TestLogicalNode.java +++ b/tajo-plan/src/test/java/org/apache/tajo/plan/TestLogicalNode.java @@ -44,13 +44,13 @@ public class TestLogicalNode { GroupbyNode groupbyNode = new GroupbyNode(0); groupbyNode.setGroupingColumns(new Column[]{schema.getColumn(1), schema.getColumn(2)}); ScanNode scanNode = new ScanNode(0); - scanNode.init(CatalogUtil.newTableDesc("in", schema, CatalogUtil.newTableMeta("CSV"), new Path("in"))); + scanNode.init(CatalogUtil.newTableDesc("in", schema, CatalogUtil.newTableMeta("TEXT"), new Path("in"))); GroupbyNode groupbyNode2 = new GroupbyNode(0); groupbyNode2.setGroupingColumns(new Column[]{schema.getColumn(1), schema.getColumn(2)}); JoinNode joinNode = new JoinNode(0); ScanNode scanNode2 = new ScanNode(0); - scanNode2.init(CatalogUtil.newTableDesc("in2", schema, CatalogUtil.newTableMeta("CSV"), new Path("in2"))); + scanNode2.init(CatalogUtil.newTableDesc("in2", schema, CatalogUtil.newTableMeta("TEXT"), new Path("in2"))); groupbyNode.setChild(scanNode); groupbyNode2.setChild(joinNode); @@ -61,7 +61,7 @@ public class TestLogicalNode { assertFalse(groupbyNode.deepEquals(groupbyNode2)); ScanNode scanNode3 = new ScanNode(0); - scanNode3.init(CatalogUtil.newTableDesc("in", schema, CatalogUtil.newTableMeta("CSV"), new Path("in"))); + scanNode3.init(CatalogUtil.newTableDesc("in", schema, CatalogUtil.newTableMeta("TEXT"), new Path("in"))); groupbyNode2.setChild(scanNode3); assertTrue(groupbyNode.equals(groupbyNode2)); http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-storage/tajo-storage-common/src/main/resources/storage-default.xml ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-common/src/main/resources/storage-default.xml b/tajo-storage/tajo-storage-common/src/main/resources/storage-default.xml index 93611fb..09261a9 100644 --- a/tajo-storage/tajo-storage-common/src/main/resources/storage-default.xml +++ b/tajo-storage/tajo-storage-common/src/main/resources/storage-default.xml @@ -39,7 +39,7 @@ <!--- Registered Scanner Handler --> <property> <name>tajo.storage.scanner-handler</name> - <value>text,csv,json,raw,rcfile,row,parquet,sequencefile,avro,hbase</value> + <value>text,json,raw,rcfile,row,parquet,sequencefile,avro,hbase</value> </property> <!--- Fragment Class Configurations --> @@ -48,10 +48,6 @@ <value>org.apache.tajo.storage.fragment.FileFragment</value> </property> <property> - <name>tajo.storage.fragment.csv.class</name> - <value>org.apache.tajo.storage.fragment.FileFragment</value> - </property> - <property> <name>tajo.storage.fragment.json.class</name> <value>org.apache.tajo.storage.fragment.FileFragment</value> </property> @@ -91,11 +87,6 @@ </property> <property> - <name>tajo.storage.scanner-handler.csv.class</name> - <value>org.apache.tajo.storage.CSVFile$CSVScanner</value> - </property> - - <property> <name>tajo.storage.scanner-handler.json.class</name> <value>org.apache.tajo.storage.text.DelimitedTextFile$DelimitedTextFileScanner</value> </property> @@ -138,7 +129,7 @@ <!--- Appender Handler --> <property> <name>tajo.storage.appender-handler</name> - <value>text,csv,raw,rcfile,row,parquet,sequencefile,avro,hbase</value> + <value>text,raw,rcfile,row,parquet,sequencefile,avro,hbase</value> </property> <property> @@ -147,11 +138,6 @@ </property> <property> - <name>tajo.storage.appender-handler.csv.class</name> - <value>org.apache.tajo.storage.CSVFile$CSVAppender</value> - </property> - - <property> <name>tajo.storage.appender-handler.json.class</name> <value>org.apache.tajo.storage.text.DelimitedTextFile$DelimitedTextFileAppender</value> </property> http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-storage/tajo-storage-common/src/test/resources/storage-default.xml ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-common/src/test/resources/storage-default.xml b/tajo-storage/tajo-storage-common/src/test/resources/storage-default.xml index 6aa32fc..ba7f4e8 100644 --- a/tajo-storage/tajo-storage-common/src/test/resources/storage-default.xml +++ b/tajo-storage/tajo-storage-common/src/test/resources/storage-default.xml @@ -38,33 +38,33 @@ <!--- Registered Scanner Handler --> <property> <name>tajo.storage.scanner-handler</name> - <value>csv,raw,rcfile,row,trevni,parquet,sequencefile,avro</value> + <value>text,json,raw,rcfile,row,parquet,sequencefile,avro,hbase</value> </property> <!--- Fragment Class Configurations --> <property> - <name>tajo.storage.fragment.csv.class</name> + <name>tajo.storage.fragment.text.class</name> <value>org.apache.tajo.storage.fragment.FileFragment</value> </property> <property> - <name>tajo.storage.fragment.raw.class</name> + <name>tajo.storage.fragment.json.class</name> <value>org.apache.tajo.storage.fragment.FileFragment</value> </property> <property> - <name>tajo.storage.fragment.rcfile.class</name> + <name>tajo.storage.fragment.raw.class</name> <value>org.apache.tajo.storage.fragment.FileFragment</value> </property> <property> - <name>tajo.storage.fragment.row.class</name> + <name>tajo.storage.fragment.rcfile.class</name> <value>org.apache.tajo.storage.fragment.FileFragment</value> </property> <property> - <name>tajo.storage.fragment.trevni.class</name> + <name>tajo.storage.fragment.row.class</name> <value>org.apache.tajo.storage.fragment.FileFragment</value> </property> <property> <name>tajo.storage.fragment.parquet.class</name> - <value>org.apache.tajo.storage.FileFragment</value> + <value>org.apache.tajo.storage.fragment.FileFragment</value> </property> <property> <name>tajo.storage.fragment.sequencefile.class</name> @@ -74,11 +74,20 @@ <name>tajo.storage.fragment.avro.class</name> <value>org.apache.tajo.storage.fragment.FileFragment</value> </property> + <property> + <name>tajo.storage.fragment.hbase.class</name> + <value>org.apache.tajo.storage.hbase.HBaseFragment</value> + </property> <!--- Scanner Handler --> <property> - <name>tajo.storage.scanner-handler.csv.class</name> - <value>org.apache.tajo.storage.CSVFile$CSVScanner</value> + <name>tajo.storage.scanner-handler.text.class</name> + <value>org.apache.tajo.storage.text.DelimitedTextFile$DelimitedTextFileScanner</value> + </property> + + <property> + <name>tajo.storage.scanner-handler.json.class</name> + <value>org.apache.tajo.storage.text.DelimitedTextFile$DelimitedTextFileScanner</value> </property> <property> @@ -97,11 +106,6 @@ </property> <property> - <name>tajo.storage.scanner-handler.trevni.class</name> - <value>org.apache.tajo.storage.trevni.TrevniScanner</value> - </property> - - <property> <name>tajo.storage.scanner-handler.parquet.class</name> <value>org.apache.tajo.storage.parquet.ParquetScanner</value> </property> @@ -116,15 +120,25 @@ <value>org.apache.tajo.storage.avro.AvroScanner</value> </property> + <property> + <name>tajo.storage.scanner-handler.hbase.class</name> + <value>org.apache.tajo.storage.hbase.HBaseScanner</value> + </property> + <!--- Appender Handler --> <property> <name>tajo.storage.appender-handler</name> - <value>csv,raw,rcfile,row,trevni,parquet,sequencefile,avro</value> + <value>text,raw,rcfile,row,parquet,sequencefile,avro,hbase</value> + </property> + + <property> + <name>tajo.storage.appender-handler.text.class</name> + <value>org.apache.tajo.storage.text.DelimitedTextFile$DelimitedTextFileAppender</value> </property> <property> - <name>tajo.storage.appender-handler.csv.class</name> - <value>org.apache.tajo.storage.CSVFile$CSVAppender</value> + <name>tajo.storage.appender-handler.json.class</name> + <value>org.apache.tajo.storage.text.DelimitedTextFile$DelimitedTextFileAppender</value> </property> <property> @@ -143,11 +157,6 @@ </property> <property> - <name>tajo.storage.appender-handler.trevni.class</name> - <value>org.apache.tajo.storage.trevni.TrevniAppender</value> - </property> - - <property> <name>tajo.storage.appender-handler.parquet.class</name> <value>org.apache.tajo.storage.parquet.ParquetAppender</value> </property> @@ -162,6 +171,16 @@ <value>org.apache.tajo.storage.avro.AvroAppender</value> </property> + <property> + <name>tajo.storage.appender-handler.hbase.class</name> + <value>org.apache.tajo.storage.hbase.HFileAppender</value> + </property> + + <property> + <name>tajo.storage.appender-handler.hfile.class</name> + <value>org.apache.tajo.storage.hbase.HFileAppender</value> + </property> + <!--- Storage buffer --> <property> <name>tajo.storage.text.io.read-buffer.bytes</name> http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-storage/tajo-storage-hbase/src/main/java/org/apache/tajo/storage/hbase/HBaseFragment.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hbase/src/main/java/org/apache/tajo/storage/hbase/HBaseFragment.java b/tajo-storage/tajo-storage-hbase/src/main/java/org/apache/tajo/storage/hbase/HBaseFragment.java index 5961751..2d3f2c3 100644 --- a/tajo-storage/tajo-storage-hbase/src/main/java/org/apache/tajo/storage/hbase/HBaseFragment.java +++ b/tajo-storage/tajo-storage-hbase/src/main/java/org/apache/tajo/storage/hbase/HBaseFragment.java @@ -23,11 +23,10 @@ import com.google.gson.annotations.Expose; import com.google.protobuf.ByteString; import com.google.protobuf.InvalidProtocolBufferException; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.tajo.catalog.CatalogUtil; +import org.apache.tajo.BuiltinStorages; import org.apache.tajo.catalog.proto.CatalogProtos.FragmentProto; -import org.apache.tajo.catalog.proto.CatalogProtos.StoreType; import org.apache.tajo.storage.fragment.Fragment; -import org.apache.tajo.storage.hbase.StorageFragmentProtos.*; +import org.apache.tajo.storage.hbase.StorageFragmentProtos.HBaseFragmentProto; import java.net.URI; @@ -173,7 +172,7 @@ public class HBaseFragment implements Fragment, Comparable<HBaseFragment>, Clone FragmentProto.Builder fragmentBuilder = FragmentProto.newBuilder(); fragmentBuilder.setId(this.tableName); fragmentBuilder.setContents(builder.buildPartial().toByteString()); - fragmentBuilder.setStoreType(CatalogUtil.getStoreTypeString(StoreType.HBASE)); + fragmentBuilder.setStoreType(BuiltinStorages.HBASE); return fragmentBuilder.build(); } http://git-wip-us.apache.org/repos/asf/tajo/blob/6d852081/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/CSVFile.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/CSVFile.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/CSVFile.java deleted file mode 100644 index e55e34b..0000000 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/CSVFile.java +++ /dev/null @@ -1,571 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tajo.storage; - -import io.netty.buffer.ByteBuf; -import org.apache.commons.lang.StringEscapeUtils; -import org.apache.commons.lang.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.*; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.compress.*; -import org.apache.tajo.TaskAttemptId; -import org.apache.tajo.catalog.Schema; -import org.apache.tajo.catalog.TableMeta; -import org.apache.tajo.catalog.proto.CatalogProtos; -import org.apache.tajo.catalog.statistics.TableStats; -import org.apache.tajo.conf.TajoConf; -import org.apache.tajo.datum.NullDatum; -import org.apache.tajo.exception.UnsupportedException; -import org.apache.tajo.plan.expr.EvalNode; -import org.apache.tajo.storage.compress.CodecPool; -import org.apache.tajo.storage.exception.AlreadyExistsStorageException; -import org.apache.tajo.storage.fragment.Fragment; -import org.apache.tajo.storage.rcfile.NonSyncByteArrayOutputStream; -import org.apache.tajo.storage.text.ByteBufLineReader; -import org.apache.tajo.storage.text.DelimitedTextFile; -import org.apache.tajo.storage.text.TextLineDeserializer; -import org.apache.tajo.util.Bytes; - -import java.io.*; -import java.util.ArrayList; -import java.util.Arrays; - -public class CSVFile { - - public static final byte LF = '\n'; - - private static final Log LOG = LogFactory.getLog(CSVFile.class); - - public static class CSVAppender extends FileAppender { - private final TableMeta meta; - private final Schema schema; - private final int columnNum; - private final FileSystem fs; - private FSDataOutputStream fos; - private DataOutputStream outputStream; - private CompressionOutputStream deflateFilter; - private byte[] delimiter; - private TableStatistics stats = null; - private Compressor compressor; - private CompressionCodecFactory codecFactory; - private CompressionCodec codec; - private Path compressedPath; - private byte[] nullChars; - private int BUFFER_SIZE = 128 * 1024; - private int bufferedBytes = 0; - private long pos = 0; - private boolean isShuffle; - - private NonSyncByteArrayOutputStream os = new NonSyncByteArrayOutputStream(BUFFER_SIZE); - private SerializerDeserializer serde; - - public CSVAppender(Configuration conf, final TaskAttemptId taskAttemptId, - final Schema schema, final TableMeta meta, final Path workDir) throws IOException { - super(conf, taskAttemptId, schema, meta, workDir); - this.fs = workDir.getFileSystem(conf); - this.meta = meta; - this.schema = schema; - this.delimiter = StringEscapeUtils.unescapeJava( - this.meta.getOption(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER)) - .getBytes(Bytes.UTF8_CHARSET); - - this.columnNum = schema.size(); - - String nullCharacters = StringEscapeUtils.unescapeJava( - this.meta.getOption(StorageConstants.TEXT_NULL, NullDatum.DEFAULT_TEXT)); - - if (StringUtils.isEmpty(nullCharacters)) { - nullChars = NullDatum.get().asTextBytes(); - } else { - nullChars = nullCharacters.getBytes(Bytes.UTF8_CHARSET); - } - } - - @Override - public void init() throws IOException { - if (!fs.exists(path.getParent())) { - throw new FileNotFoundException(path.getParent().toString()); - } - - //determine the intermediate file type - String store = conf.get(TajoConf.ConfVars.SHUFFLE_FILE_FORMAT.varname, - TajoConf.ConfVars.SHUFFLE_FILE_FORMAT.defaultVal); - if (enabledStats && CatalogProtos.StoreType.CSV == CatalogProtos.StoreType.valueOf(store.toUpperCase())) { - isShuffle = true; - } else { - isShuffle = false; - } - - if(this.meta.containsOption(StorageConstants.COMPRESSION_CODEC)) { - String codecName = this.meta.getOption(StorageConstants.COMPRESSION_CODEC); - codecFactory = new CompressionCodecFactory(conf); - codec = codecFactory.getCodecByClassName(codecName); - compressor = CodecPool.getCompressor(codec); - if(compressor != null) compressor.reset(); //builtin gzip is null - - String extension = codec.getDefaultExtension(); - compressedPath = path.suffix(extension); - - if (fs.exists(compressedPath)) { - throw new AlreadyExistsStorageException(compressedPath); - } - - fos = fs.create(compressedPath); - deflateFilter = codec.createOutputStream(fos, compressor); - outputStream = new DataOutputStream(deflateFilter); - - } else { - if (fs.exists(path)) { - throw new AlreadyExistsStorageException(path); - } - fos = fs.create(path); - outputStream = new DataOutputStream(new BufferedOutputStream(fos)); - } - - if (enabledStats) { - this.stats = new TableStatistics(this.schema); - } - - try { - //It will be remove, because we will add custom serde in textfile - String serdeClass = this.meta.getOption(StorageConstants.CSVFILE_SERDE, - TextSerializerDeserializer.class.getName()); - serde = (SerializerDeserializer) Class.forName(serdeClass).newInstance(); - serde.init(schema); - } catch (Exception e) { - LOG.error(e.getMessage(), e); - throw new IOException(e); - } - - os.reset(); - pos = fos.getPos(); - bufferedBytes = 0; - super.init(); - } - - - @Override - public void addTuple(Tuple tuple) throws IOException { - int rowBytes = 0; - - for (int i = 0; i < columnNum; i++) { - rowBytes += serde.serialize(i, tuple, os, nullChars); - - if(columnNum - 1 > i){ - os.write(delimiter); - rowBytes += delimiter.length; - } - if (isShuffle) { - // it is to calculate min/max values, and it is only used for the intermediate file. - stats.analyzeField(i, tuple); - } - } - os.write(LF); - rowBytes += 1; - - pos += rowBytes; - bufferedBytes += rowBytes; - if(bufferedBytes > BUFFER_SIZE){ - flushBuffer(); - } - // Statistical section - if (enabledStats) { - stats.incrementRow(); - } - } - - private void flushBuffer() throws IOException { - if(os.getLength() > 0) { - os.writeTo(outputStream); - os.reset(); - bufferedBytes = 0; - } - } - @Override - public long getOffset() throws IOException { - return pos; - } - - @Override - public void flush() throws IOException { - flushBuffer(); - outputStream.flush(); - } - - @Override - public void close() throws IOException { - - try { - flush(); - - // Statistical section - if (enabledStats) { - stats.setNumBytes(getOffset()); - } - - if(deflateFilter != null) { - deflateFilter.finish(); - deflateFilter.resetState(); - deflateFilter = null; - } - } finally { - IOUtils.cleanup(LOG, os, fos); - if (compressor != null) { - CodecPool.returnCompressor(compressor); - compressor = null; - } - } - } - - @Override - public TableStats getStats() { - if (enabledStats) { - return stats.getTableStat(); - } else { - return null; - } - } - - public boolean isCompress() { - return compressor != null; - } - - public String getExtension() { - return codec != null ? codec.getDefaultExtension() : ""; - } - } - - public static class CSVScanner extends FileScanner implements SeekableScanner { - public CSVScanner(Configuration conf, final Schema schema, final TableMeta meta, final Fragment fragment) - throws IOException { - super(conf, schema, meta, fragment); - factory = new CompressionCodecFactory(conf); - codec = factory.getCodec(this.fragment.getPath()); - if (codec == null || codec instanceof SplittableCompressionCodec) { - splittable = true; - } - } - - private final static int DEFAULT_PAGE_SIZE = 256 * 1024; - private FileSystem fs; - private FSDataInputStream fis; - private InputStream is; //decompressd stream - private CompressionCodecFactory factory; - private CompressionCodec codec; - private Decompressor decompressor; - private Seekable filePosition; - private boolean splittable = false; - private long startOffset, end, pos; - private int currentIdx = 0, validIdx = 0, recordCount = 0; - private int[] targetColumnIndexes; - private boolean eof = false; - private SplitLineReader reader; - private ArrayList<Long> fileOffsets; - private ArrayList<Integer> rowLengthList; - private ArrayList<Integer> startOffsets; - private NonSyncByteArrayOutputStream buffer; - private Tuple outTuple; - private TextLineDeserializer deserializer; - private ByteBuf byteBuf = BufferPool.directBuffer(ByteBufLineReader.DEFAULT_BUFFER); - - @Override - public void init() throws IOException { - fileOffsets = new ArrayList<Long>(); - rowLengthList = new ArrayList<Integer>(); - startOffsets = new ArrayList<Integer>(); - buffer = new NonSyncByteArrayOutputStream(DEFAULT_PAGE_SIZE); - - // FileFragment information - if(fs == null) { - fs = FileScanner.getFileSystem((TajoConf)conf, fragment.getPath()); - } - if(fis == null) fis = fs.open(fragment.getPath()); - - recordCount = 0; - pos = startOffset = fragment.getStartKey(); - end = startOffset + fragment.getLength(); - - if (codec != null) { - decompressor = CodecPool.getDecompressor(codec); - if (codec instanceof SplittableCompressionCodec) { - SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( - fis, decompressor, startOffset, end, - SplittableCompressionCodec.READ_MODE.BYBLOCK); - - reader = new CompressedSplitLineReader(cIn, conf, null); - startOffset = cIn.getAdjustedStart(); - end = cIn.getAdjustedEnd(); - filePosition = cIn; - is = cIn; - } else { - is = new DataInputStream(codec.createInputStream(fis, decompressor)); - reader = new SplitLineReader(is, null); - filePosition = fis; - } - } else { - fis.seek(startOffset); - filePosition = fis; - is = fis; - reader = new SplitLineReader(is, null); - } - - if (targets == null) { - targets = schema.toArray(); - } - - outTuple = new VTuple(targets.length); - deserializer = DelimitedTextFile.getLineSerde(meta).createDeserializer(schema, meta, targets); - deserializer.init(); - - targetColumnIndexes = new int[targets.length]; - for (int i = 0; i < targets.length; i++) { - targetColumnIndexes[i] = schema.getColumnId(targets[i].getQualifiedName()); - } - - super.init(); - Arrays.sort(targetColumnIndexes); - if (LOG.isDebugEnabled()) { - LOG.debug("CSVScanner open:" + fragment.getPath() + "," + startOffset + "," + end + - "," + fs.getFileStatus(fragment.getPath()).getLen()); - } - - if (startOffset != 0) { - pos += reader.readLine(new Text(), 0, maxBytesToConsume(pos)); - } - eof = false; - page(); - } - - private int maxBytesToConsume(long pos) { - return isCompress() ? Integer.MAX_VALUE : (int) Math.min(Integer.MAX_VALUE, end - pos); - } - - private long fragmentable() throws IOException { - return end - getFilePosition(); - } - - private long getFilePosition() throws IOException { - long retVal; - if (isCompress()) { - retVal = filePosition.getPos(); - } else { - retVal = pos; - } - return retVal; - } - - private void page() throws IOException { - // Index initialization - currentIdx = 0; - validIdx = 0; - int currentBufferPos = 0; - int bufferedSize = 0; - - buffer.reset(); - startOffsets.clear(); - rowLengthList.clear(); - fileOffsets.clear(); - - if(eof) { - return; - } - - while (DEFAULT_PAGE_SIZE >= bufferedSize){ - - int ret = reader.readDefaultLine(buffer, rowLengthList, Integer.MAX_VALUE, Integer.MAX_VALUE); - - if(ret == 0){ - break; - } else { - fileOffsets.add(pos); - pos += ret; - startOffsets.add(currentBufferPos); - currentBufferPos += rowLengthList.get(rowLengthList.size() - 1); - bufferedSize += ret; - validIdx++; - recordCount++; - } - - if(getFilePosition() > end && !reader.needAdditionalRecordAfterSplit()){ - eof = true; - break; - } - } - if (tableStats != null) { - tableStats.setReadBytes(pos - startOffset); - tableStats.setNumRows(recordCount); - } - } - - @Override - public float getProgress() { - try { - if(eof) { - return 1.0f; - } - long filePos = getFilePosition(); - if (startOffset == filePos) { - return 0.0f; - } else { - long readBytes = filePos - startOffset; - long remainingBytes = Math.max(end - filePos, 0); - return Math.min(1.0f, (float)(readBytes) / (float)(readBytes + remainingBytes)); - } - } catch (IOException e) { - LOG.error(e.getMessage(), e); - return 0.0f; - } - } - - @Override - public Tuple next() throws IOException { - try { - if (currentIdx == validIdx) { - if (eof) { - return null; - } else { - page(); - - if(currentIdx == validIdx){ - return null; - } - } - } - - byteBuf.clear(); - byteBuf.writeBytes(buffer.getData(), startOffsets.get(currentIdx), rowLengthList.get(currentIdx)); - - deserializer.deserialize(byteBuf, outTuple); - - currentIdx++; - return outTuple; - } catch (Throwable t) { - LOG.error("Tuple list length: " + (fileOffsets != null ? fileOffsets.size() : 0), t); - LOG.error("Tuple list current index: " + currentIdx, t); - throw new IOException(t); - } - } - - private boolean isCompress() { - return codec != null; - } - - @Override - public void reset() throws IOException { - if (decompressor != null) { - CodecPool.returnDecompressor(decompressor); - decompressor = null; - } - - init(); - } - - @Override - public void close() throws IOException { - try { - if (tableStats != null) { - tableStats.setReadBytes(pos - startOffset); //Actual Processed Bytes. (decompressed bytes + overhead) - tableStats.setNumRows(recordCount); - } - - IOUtils.cleanup(LOG, reader, is, fis); - fs = null; - is = null; - fis = null; - if (LOG.isDebugEnabled()) { - LOG.debug("CSVScanner processed record:" + recordCount); - } - } finally { - if (decompressor != null) { - CodecPool.returnDecompressor(decompressor); - decompressor = null; - } - outTuple = null; - if (this.byteBuf.refCnt() > 0) { - this.byteBuf.release(); - } - } - } - - @Override - public boolean isProjectable() { - return true; - } - - @Override - public boolean isSelectable() { - return false; - } - - @Override - public void setFilter(EvalNode filter) { - throw new UnsupportedException(); - } - - @Override - public void seek(long offset) throws IOException { - if(isCompress()) throw new UnsupportedException(); - - int tupleIndex = Arrays.binarySearch(fileOffsets.toArray(), offset); - - if (tupleIndex > -1) { - this.currentIdx = tupleIndex; - } else if (isSplittable() && end >= offset || startOffset <= offset) { - eof = false; - fis.seek(offset); - pos = offset; - reader.reset(); - this.currentIdx = 0; - this.validIdx = 0; - // pageBuffer(); - } else { - throw new IOException("invalid offset " + - " < start : " + startOffset + " , " + - " end : " + end + " , " + - " filePos : " + filePosition.getPos() + " , " + - " input offset : " + offset + " >"); - } - } - - @Override - public long getNextOffset() throws IOException { - if(isCompress()) throw new UnsupportedException(); - - if (this.currentIdx == this.validIdx) { - if (fragmentable() <= 0) { - return -1; - } else { - page(); - if(currentIdx == validIdx) return -1; - } - } - return fileOffsets.get(currentIdx); - } - - @Override - public boolean isSplittable(){ - return splittable; - } - } -}
