Markus Kemper created SQOOP-3077:
------------------------------------
Summary: Add support for (import + --hcatalog + --as-avrodatafile)
with RDBMS type TIMESTAMP
Key: SQOOP-3077
URL: https://issues.apache.org/jira/browse/SQOOP-3077
Project: Sqoop
Issue Type: Improvement
Reporter: Markus Kemper
Please consider adding support for --hcatalog import and TIMESTAMPS, the Avro
Specification suggest that Logical Types support TIMESTAMPS.
Avro Doc:
https://avro.apache.org/docs/1.8.1/spec.html#Logical+Types
{noformat}
#################
# STEP 01 - Setup Table and Data
#################
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query
"drop table t1_dates"
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query
"create table t1_dates (c1_int integer, c2_date date, c3_timestamp timestamp)"
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query
"select dbms_metadata.get_ddl('TABLE', 'T1_DATES', 'SQOOP') from dual"
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query
"insert into t1_dates values (1, current_date, current_timestamp)"
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query
"select * from t1_dates"
Output:
------------------------
| DBMS_METADATA.GET_DDL('TABLE','T1_DATES','SQOOP') |
------------------------
|
CREATE TABLE "SQOOP"."T1_DATES"
( "C1_INT" NUMBER(*,0),
"C2_DATE" DATE,
"C3_TIMESTAMP" TIMESTAMP (6)
) SEGMENT CREATION DEFERRED
PCTFREE 10 PCTUSED 40 INITRANS 1 MAXTRANS 255
NOCOMPRESS LOGGING
TABLESPACE "SQOOP" |
------------------------
---
------------------------------------------------
| C1_INT | C2_DATE | C3_TIMESTAMP |
------------------------------------------------
| 1 | 2016-12-10 15:48:23.0 | 2016-12-10 15:48:23.707327 |
------------------------------------------------
#################
# STEP 02 - Import with Text Format
#################
beeline -u jdbc:hive2:// -e "use default; drop table t1_dates_text;"
sqoop import --connect $MYCONN --username $MYUSER --password $MYPSWD --table
T1_DATES --hcatalog-database default --hcatalog-table t1_dates_text
--create-hcatalog-table --hcatalog-storage-stanza 'stored as textfile'
--num-mappers 1 --map-column-hive c2_date=date,c3_timestamp=timestamp
beeline -u jdbc:hive2:// -e "use default; describe t1_dates_text; select * from
t1_dates_text;"
+-----------------------------------------------------------------+--+
| createtab_stmt |
+-----------------------------------------------------------------+--+
| CREATE TABLE `t1_dates_text`( |
| `c1_int` decimal(38,0), |
| `c2_date` date, |
| `c3_timestamp` timestamp) |
| ROW FORMAT SERDE |
| 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' |
| STORED AS INPUTFORMAT |
| 'org.apache.hadoop.mapred.TextInputFormat' |
| OUTPUTFORMAT |
| 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' |
| LOCATION |
| 'hdfs://nameservice1/user/hive/warehouse/t1_dates_text' |
| TBLPROPERTIES ( |
| 'transient_lastDdlTime'='1481386391') |
+-----------------------------------------------------------------+--+
--
+-----------------------+------------------------+-----------------------------+--+
| t1_dates_text.c1_int | t1_dates_text.c2_date | t1_dates_text.c3_timestamp |
+-----------------------+------------------------+-----------------------------+--+
| 1 | 2016-12-10 | 2016-12-10 15:48:23.707327 |
+-----------------------+------------------------+-----------------------------+--+
#################
# STEP 03 - Import with Avro Format (default)
#################
beeline -u jdbc:hive2:// -e "use default; drop table t1_dates_text;"
sqoop import --connect $MYCONN --username $MYUSER --password $MYPSWD --table
T1_DATES --hcatalog-database default --hcatalog-table t1_dates_avro
--create-hcatalog-table --hcatalog-storage-stanza 'stored as avro'
--num-mappers 1
beeline -u jdbc:hive2:// -e "use default; show create table t1_dates_avro;
select * from t1_dates_avro;"
+------------------------------------------------------------------+--+
| createtab_stmt |
+------------------------------------------------------------------+--+
| CREATE TABLE `t1_dates_avro`( |
| `c1_int` decimal(38,0) COMMENT '', |
| `c2_date` string COMMENT '', |
| `c3_timestamp` string COMMENT '') |
| ROW FORMAT SERDE |
| 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' |
| STORED AS INPUTFORMAT |
| 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' |
| OUTPUTFORMAT |
| 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' |
| LOCATION |
| 'hdfs://nameservice1/user/hive/warehouse/t1_dates_avro' |
| TBLPROPERTIES ( |
| 'transient_lastDdlTime'='1481390657') |
+------------------------------------------------------------------+--+
--
+-----------------------+------------------------+-----------------------------+--+
| t1_dates_avro.c1_int | t1_dates_avro.c2_date | t1_dates_avro.c3_timestamp |
+-----------------------+------------------------+-----------------------------+--+
| 1 | 2016-12-10 15:48:23.0 | 2016-12-10 15:48:23.707327 |
+-----------------------+------------------------+-----------------------------+--+
#################
# STEP 04 - Import with Avro Format (--map-column-hive c2_date=date)
#################
beeline -u jdbc:hive2:// -e "use default; drop table t1_dates_avro;"
sqoop import --connect $MYCONN --username $MYUSER --password $MYPSWD --table
T1_DATES --hcatalog-database default --hcatalog-table t1_dates_avro
--create-hcatalog-table --hcatalog-storage-stanza 'stored as avro'
--num-mappers 1 --columns c1_int,c2_date --map-column-hive c2_date=date
beeline -u jdbc:hive2:// -e "use default; show create table t1_dates_avro;
select * from t1_dates_avro;"
+------------------------------------------------------------------+--+
| createtab_stmt |
+------------------------------------------------------------------+--+
| CREATE TABLE `t1_dates_avro`( |
| `c1_int` decimal(38,0) COMMENT '', |
| `c2_date` date COMMENT '') |
| ROW FORMAT SERDE |
| 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' |
| STORED AS INPUTFORMAT |
| 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' |
| OUTPUTFORMAT |
| 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' |
| LOCATION |
| 'hdfs://nameservice1/user/hive/warehouse/t1_dates_avro' |
| TBLPROPERTIES ( |
| 'transient_lastDdlTime'='1481390814') |
+------------------------------------------------------------------+--+
---
+-----------------------+------------------------+--+
| t1_dates_avro.c1_int | t1_dates_avro.c2_date |
+-----------------------+------------------------+--+
| 1 | 2016-12-10 |
+-----------------------+------------------------+--+
#################
# STEP 05 - Import with Avro Format (--map-column-hive c3_timestamp=timestamp)
#################
beeline -u jdbc:hive2:// -e "use default; drop table t1_dates_avro;"
sqoop import --connect $MYCONN --username $MYUSER --password $MYPSWD --table
T1_DATES --hcatalog-database default --hcatalog-table t1_dates_avro
--create-hcatalog-table --hcatalog-storage-stanza 'stored as avro'
--num-mappers 1 --columns c1_int,c3_timestamp --map-column-hive
c3_timestamp=timestamp
beeline -u jdbc:hive2:// -e "use default; show create table t1_dates_avro;
select * from t1_dates_avro;"
Output:
16/12/10 09:28:52 INFO hcat.SqoopHCatUtilities: Database column name - info map
:
c1_int : [Type : 2,Precision : 38,Scale : 0]
c3_timestamp : [Type : 93,Precision : 0,Scale : 6]
16/12/10 09:28:52 INFO hcat.SqoopHCatUtilities: Creating HCatalog table
default.t1_dates_avro for import
16/12/10 09:28:52 INFO hcat.SqoopHCatUtilities: HCatalog Create table
statement:
create table `default`.`t1_dates_avro` (
`c1_int` decimal(38),
`c3_timestamp` timestamp)
stored as avro
16/12/10 09:28:52 INFO hcat.SqoopHCatUtilities: Executing external HCatalog CLI
process with args :-f,/tmp/hcat-script-1481390932995
16/12/10 09:28:57 INFO hcat.SqoopHCatUtilities: FAILED: Execution Error, return
code 1 from org.apache.hadoop.hive.ql.exec.DDLTask.
java.lang.UnsupportedOperationException: timestamp is not supported.
16/12/10 09:28:57 ERROR tool.ImportTool: Encountered IOException running import
job: java.io.IOException: HCat exited with status 1
at
org.apache.sqoop.mapreduce.hcat.SqoopHCatUtilities.executeExternalHCatProgram(SqoopHCatUtilities.java:1148)
at
org.apache.sqoop.mapreduce.hcat.SqoopHCatUtilities.launchHCatCli(SqoopHCatUtilities.java:1097)
at
org.apache.sqoop.mapreduce.hcat.SqoopHCatUtilities.createHCatTable(SqoopHCatUtilities.java:644)
at
org.apache.sqoop.mapreduce.hcat.SqoopHCatUtilities.configureHCat(SqoopHCatUtilities.java:340)
at
org.apache.sqoop.mapreduce.hcat.SqoopHCatUtilities.configureImportOutputFormat(SqoopHCatUtilities.java:802)
at
org.apache.sqoop.mapreduce.ImportJobBase.configureOutputFormat(ImportJobBase.java:98)
at
org.apache.sqoop.mapreduce.ImportJobBase.runImport(ImportJobBase.java:259)
at org.apache.sqoop.manager.SqlManager.importTable(SqlManager.java:692)
at
org.apache.sqoop.manager.OracleManager.importTable(OracleManager.java:444)
at org.apache.sqoop.tool.ImportTool.importTable(ImportTool.java:507)
at org.apache.sqoop.tool.ImportTool.run(ImportTool.java:615)
at org.apache.sqoop.Sqoop.run(Sqoop.java:143)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70)
at org.apache.sqoop.Sqoop.runSqoop(Sqoop.java:179)
at org.apache.sqoop.Sqoop.runTool(Sqoop.java:218)
at org.apache.sqoop.Sqoop.runTool(Sqoop.java:227)
at org.apache.sqoop.Sqoop.main(Sqoop.java:236)
{noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)