[
https://issues.apache.org/jira/browse/HIVE-26929?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Dharmik Thakkar updated HIVE-26929:
-----------------------------------
Description:
Allow creating iceberg tables without column definition when
'metadata_location' tblproperties is set.
Iceberg supports pointing to external metadata.json file to infer table schema.
Irrespective of the schema defined as part of create table statement the
metadata.json is used to create table. We should allow creating table without
column definition in case the metadata_location is defined in tblproperties.
{code:java}
create table test_meta (id int, name string, cgpa decimal) stored by iceberg
stored as orc;
describe formatted test_meta;
create table test_meta_copy(id int) stored by iceberg
tblproperties('metadata_location'='s3a://s3bucket/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json');{code}
As a result of above SQL we get test_meta_copy with same schema as test_meta
irrespective of the columns specified in create table statement.
|{color:#000000}*col_name*{color}|{color:#000000}*data_type*{color}|
|{color:#000000}*id*{color}|{color:#000000}int{color}|
|{color:#000000}*name*{color}|{color:#000000}string{color}|
|{color:#000000}*cgpa*{color}|{color:#000000}decimal(10,0){color}|
| |{color:#000000}NULL{color}|
|{color:#000000}*# Detailed Table
Information*{color}|{color:#000000}NULL{color}|
|{color:#000000}*Database:*
{color}|{color:#000000}iceberg_test_db_hive{color}|
|{color:#000000}*OwnerType: * {color}|{color:#000000}USER
{color}|
|{color:#000000}*Owner: * {color}|{color:#000000}hive
{color}|
|{color:#000000}*CreateTime:* {color}|{color:#000000}Tue Jan 10
21:49:08 UTC 2023{color}|
|{color:#000000}*LastAccessTime:* {color}|{color:#000000}Fri Dec 12
21:41:41 UTC 1969{color}|
|{color:#000000}*Retention: * {color}|{color:#000000}2147483647{color}|
|{color:#000000}*Location:*
{color}|{color:#000000}+s3a://s3bucket/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta+{color}|
|{color:#000000}*Table Type:* {color}|{color:#000000}EXTERNAL_TABLE
{color}|
|{color:#000000}*Table Parameters:*{color}|{color:#000000}NULL{color}|
| |{color:#000000}EXTERNAL {color}|
| |{color:#000000}bucketing_version {color}|
| |{color:#000000}engine.hive.enabled{color}|
| |{color:#000000}metadata_location {color}|
| |{color:#000000}numFiles {color}|
| |{color:#000000}numRows {color}|
| |{color:#000000}rawDataSize {color}|
| |{color:#000000}serialization.format{color}|
| |{color:#000000}storage_handler {color}|
| |{color:#000000}table_type {color}|
| |{color:#000000}totalSize {color}|
| |{color:#000000}transient_lastDdlTime{color}|
| |{color:#000000}uuid {color}|
| |{color:#000000}write.format.default{color}|
| |{color:#000000}NULL{color}|
|{color:#000000}*# Storage Information*{color}|{color:#000000}NULL{color}|
|{color:#000000}*SerDe Library: *
{color}|{color:#000000}org.apache.iceberg.mr.hive.HiveIcebergSerDe{color}|
|{color:#000000}*InputFormat: *
{color}|{color:#000000}org.apache.iceberg.mr.hive.HiveIcebergInputFormat{color}|
|{color:#000000}*OutputFormat:*
{color}|{color:#000000}org.apache.iceberg.mr.hive.HiveIcebergOutputFormat{color}|
|{color:#000000}*Compressed:* {color}|{color:#000000}No
{color}|
|{color:#000000}*Sort Columns:* {color}|{color:#000000}[]
{color}|
However if we skip passing column definition the query fails
{code:java}
create table test_meta_copy2 stored by iceberg
tblproperties('metadata_location'='s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json');{code}
error
{code:java}
INFO : Compiling
command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19):
create table test_meta_copy2 stored by iceberg
tblproperties('metadata_location'='s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json')
INFO : Semantic Analysis Completed (retrial = false)
INFO : Created Hive schema: Schema(fieldSchemas:null, properties:null)
INFO : Completed compiling
command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19); Time
taken: 0.019 seconds
INFO : Executing
command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19):
create table test_meta_copy2 stored by iceberg
tblproperties('metadata_location'='s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json')
INFO : Starting task [Stage-0:DDL] in serial mode
ERROR : Failed
org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException:
MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please
provide an existing table or a valid schema)
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1361)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1366)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.createTableNonReplaceMode(CreateTableOperation.java:158)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.execute(CreateTableOperation.java:116)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.ddl.DDLTask.execute(DDLTask.java:84)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:360)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:333)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:250)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:111)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:809)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:547)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:541)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:232)
~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:89)
~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:338)
~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at java.security.AccessController.doPrivileged(Native Method) ~[?:?]
at javax.security.auth.Subject.doAs(Subject.java:423) ~[?:?]
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
~[hadoop-common-3.1.1.7.2.15.4-6.jar:?]
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:358)
~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
~[?:?]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
~[?:?]
at java.lang.Thread.run(Thread.java:829) [?:?]
Caused by: java.lang.RuntimeException:
MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please
provide an existing table or a valid schema)
at
org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:349)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
... 28 more
Caused by: org.apache.hadoop.hive.metastore.api.MetaException:
org.apache.hadoop.hive.serde2.SerDeException Please provide an existing table
or a valid schema
at
org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:123)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:80)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:347)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
... 28 more
ERROR : DDLTask failed, DDL Operation: class
org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation
org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException:
MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please
provide an existing table or a valid schema)
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1361)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1366)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.createTableNonReplaceMode(CreateTableOperation.java:158)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.execute(CreateTableOperation.java:116)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.ddl.DDLTask.execute(DDLTask.java:84)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:360)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:333)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:250)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:111)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:809)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:547)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:541)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:232)
~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:89)
~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:338)
~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at java.security.AccessController.doPrivileged(Native Method) ~[?:?]
at javax.security.auth.Subject.doAs(Subject.java:423) ~[?:?]
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
~[hadoop-common-3.1.1.7.2.15.4-6.jar:?]
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:358)
~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
~[?:?]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
~[?:?]
at java.lang.Thread.run(Thread.java:829) [?:?]
Caused by: java.lang.RuntimeException:
MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please
provide an existing table or a valid schema)
at
org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:349)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
... 28 more
Caused by: org.apache.hadoop.hive.metastore.api.MetaException:
org.apache.hadoop.hive.serde2.SerDeException Please provide an existing table
or a valid schema
at
org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:123)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:80)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:347)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
... 28 more
ERROR : FAILED: Execution Error, return code 40000 from
org.apache.hadoop.hive.ql.ddl.DDLTask. java.lang.RuntimeException:
MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please
provide an existing table or a valid schema)
INFO : Completed executing
command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19); Time
taken: 0.033 seconds
INFO : OK {code}
was:
Allow creating iceberg tables without column definition when
'metadata_location' tblproperties is set.
Iceberg supports pointing to external metadata.json file to infer table schema.
Irrespective of the schema defined as part of create table statement the
metadata.json is used to create table. We should allow creating table without
column definition in case the metadata_location is defined in tblproperties.
{code:java}
create table test_meta (id int, name string, cgpa decimal) stored by iceberg
stored as orc;
describe formatted test_meta;
create table test_meta_copy(id int) stored by iceberg
tblproperties('metadata_location'='s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json');{code}
As a result of above SQL we get test_meta_copy with same schema as test_meta
irrespective of the columns specified in create table statement.
|{color:#000000}*col_name*{color}|{color:#000000}*data_type*{color}|
|{color:#000000}*id*{color}|{color:#000000}int{color}|
|{color:#000000}*name*{color}|{color:#000000}string{color}|
|{color:#000000}*cgpa*{color}|{color:#000000}decimal(10,0){color}|
| |{color:#000000}NULL{color}|
|{color:#000000}*# Detailed Table
Information*{color}|{color:#000000}NULL{color}|
|{color:#000000}*Database:*
{color}|{color:#000000}iceberg_test_db_hive{color}|
|{color:#000000}*OwnerType: * {color}|{color:#000000}USER
{color}|
|{color:#000000}*Owner: * {color}|{color:#000000}hive
{color}|
|{color:#000000}*CreateTime:* {color}|{color:#000000}Tue Jan 10
21:49:08 UTC 2023{color}|
|{color:#000000}*LastAccessTime:* {color}|{color:#000000}Fri Dec 12
21:41:41 UTC 1969{color}|
|{color:#000000}*Retention: * {color}|{color:#000000}2147483647{color}|
|{color:#000000}*Location:*
{color}|{color:#000000}+s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta+{color}|
|{color:#000000}*Table Type:* {color}|{color:#000000}EXTERNAL_TABLE
{color}|
|{color:#000000}*Table Parameters:*{color}|{color:#000000}NULL{color}|
| |{color:#000000}EXTERNAL {color}|
| |{color:#000000}bucketing_version {color}|
| |{color:#000000}engine.hive.enabled{color}|
| |{color:#000000}metadata_location {color}|
| |{color:#000000}numFiles {color}|
| |{color:#000000}numRows {color}|
| |{color:#000000}rawDataSize {color}|
| |{color:#000000}serialization.format{color}|
| |{color:#000000}storage_handler {color}|
| |{color:#000000}table_type {color}|
| |{color:#000000}totalSize {color}|
| |{color:#000000}transient_lastDdlTime{color}|
| |{color:#000000}uuid {color}|
| |{color:#000000}write.format.default{color}|
| |{color:#000000}NULL{color}|
|{color:#000000}*# Storage Information*{color}|{color:#000000}NULL{color}|
|{color:#000000}*SerDe Library: *
{color}|{color:#000000}org.apache.iceberg.mr.hive.HiveIcebergSerDe{color}|
|{color:#000000}*InputFormat: *
{color}|{color:#000000}org.apache.iceberg.mr.hive.HiveIcebergInputFormat{color}|
|{color:#000000}*OutputFormat:*
{color}|{color:#000000}org.apache.iceberg.mr.hive.HiveIcebergOutputFormat{color}|
|{color:#000000}*Compressed:* {color}|{color:#000000}No
{color}|
|{color:#000000}*Sort Columns:* {color}|{color:#000000}[]
{color}|
However if we skip passing column definition the query fails
{code:java}
create table test_meta_copy2 stored by iceberg
tblproperties('metadata_location'='s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json');{code}
error
{code:java}
INFO : Compiling
command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19):
create table test_meta_copy2 stored by iceberg
tblproperties('metadata_location'='s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json')
INFO : Semantic Analysis Completed (retrial = false)
INFO : Created Hive schema: Schema(fieldSchemas:null, properties:null)
INFO : Completed compiling
command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19); Time
taken: 0.019 seconds
INFO : Executing
command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19):
create table test_meta_copy2 stored by iceberg
tblproperties('metadata_location'='s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json')
INFO : Starting task [Stage-0:DDL] in serial mode
ERROR : Failed
org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException:
MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please
provide an existing table or a valid schema)
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1361)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1366)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.createTableNonReplaceMode(CreateTableOperation.java:158)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.execute(CreateTableOperation.java:116)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.ddl.DDLTask.execute(DDLTask.java:84)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:360)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:333)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:250)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:111)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:809)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:547)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:541)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:232)
~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:89)
~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:338)
~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at java.security.AccessController.doPrivileged(Native Method) ~[?:?]
at javax.security.auth.Subject.doAs(Subject.java:423) ~[?:?]
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
~[hadoop-common-3.1.1.7.2.15.4-6.jar:?]
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:358)
~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
~[?:?]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
~[?:?]
at java.lang.Thread.run(Thread.java:829) [?:?]
Caused by: java.lang.RuntimeException:
MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please
provide an existing table or a valid schema)
at
org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:349)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
... 28 more
Caused by: org.apache.hadoop.hive.metastore.api.MetaException:
org.apache.hadoop.hive.serde2.SerDeException Please provide an existing table
or a valid schema
at
org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:123)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:80)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:347)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
... 28 more
ERROR : DDLTask failed, DDL Operation: class
org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation
org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException:
MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please
provide an existing table or a valid schema)
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1361)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1366)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.createTableNonReplaceMode(CreateTableOperation.java:158)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.execute(CreateTableOperation.java:116)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.ddl.DDLTask.execute(DDLTask.java:84)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:360)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:333)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:250)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:111)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:809)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:547)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:541)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:232)
~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:89)
~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:338)
~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at java.security.AccessController.doPrivileged(Native Method) ~[?:?]
at javax.security.auth.Subject.doAs(Subject.java:423) ~[?:?]
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
~[hadoop-common-3.1.1.7.2.15.4-6.jar:?]
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:358)
~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
~[?:?]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
~[?:?]
at java.lang.Thread.run(Thread.java:829) [?:?]
Caused by: java.lang.RuntimeException:
MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please
provide an existing table or a valid schema)
at
org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:349)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
... 28 more
Caused by: org.apache.hadoop.hive.metastore.api.MetaException:
org.apache.hadoop.hive.serde2.SerDeException Please provide an existing table
or a valid schema
at
org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:123)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:80)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:347)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at
org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307)
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
... 28 more
ERROR : FAILED: Execution Error, return code 40000 from
org.apache.hadoop.hive.ql.ddl.DDLTask. java.lang.RuntimeException:
MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please
provide an existing table or a valid schema)
INFO : Completed executing
command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19); Time
taken: 0.033 seconds
INFO : OK {code}
> Allow creating iceberg tables without column definition when
> 'metadata_location' tblproperties is set.
> ------------------------------------------------------------------------------------------------------
>
> Key: HIVE-26929
> URL: https://issues.apache.org/jira/browse/HIVE-26929
> Project: Hive
> Issue Type: Improvement
> Components: Iceberg integration
> Reporter: Dharmik Thakkar
> Priority: Minor
>
> Allow creating iceberg tables without column definition when
> 'metadata_location' tblproperties is set.
> Iceberg supports pointing to external metadata.json file to infer table
> schema. Irrespective of the schema defined as part of create table statement
> the metadata.json is used to create table. We should allow creating table
> without column definition in case the metadata_location is defined in
> tblproperties.
> {code:java}
> create table test_meta (id int, name string, cgpa decimal) stored by iceberg
> stored as orc;
> describe formatted test_meta;
> create table test_meta_copy(id int) stored by iceberg
> tblproperties('metadata_location'='s3a://s3bucket/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json');{code}
> As a result of above SQL we get test_meta_copy with same schema as test_meta
> irrespective of the columns specified in create table statement.
> |{color:#000000}*col_name*{color}|{color:#000000}*data_type*{color}|
> |{color:#000000}*id*{color}|{color:#000000}int{color}|
> |{color:#000000}*name*{color}|{color:#000000}string{color}|
> |{color:#000000}*cgpa*{color}|{color:#000000}decimal(10,0){color}|
> | |{color:#000000}NULL{color}|
> |{color:#000000}*# Detailed Table
> Information*{color}|{color:#000000}NULL{color}|
> |{color:#000000}*Database:*
> {color}|{color:#000000}iceberg_test_db_hive{color}|
> |{color:#000000}*OwnerType: * {color}|{color:#000000}USER
> {color}|
> |{color:#000000}*Owner: * {color}|{color:#000000}hive
> {color}|
> |{color:#000000}*CreateTime:* {color}|{color:#000000}Tue Jan 10
> 21:49:08 UTC 2023{color}|
> |{color:#000000}*LastAccessTime:* {color}|{color:#000000}Fri Dec 12
> 21:41:41 UTC 1969{color}|
> |{color:#000000}*Retention: *
> {color}|{color:#000000}2147483647{color}|
> |{color:#000000}*Location:*
> {color}|{color:#000000}+s3a://s3bucket/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta+{color}|
> |{color:#000000}*Table Type:* {color}|{color:#000000}EXTERNAL_TABLE
> {color}|
> |{color:#000000}*Table Parameters:*{color}|{color:#000000}NULL{color}|
> | |{color:#000000}EXTERNAL {color}|
> | |{color:#000000}bucketing_version {color}|
> | |{color:#000000}engine.hive.enabled{color}|
> | |{color:#000000}metadata_location {color}|
> | |{color:#000000}numFiles {color}|
> | |{color:#000000}numRows {color}|
> | |{color:#000000}rawDataSize {color}|
> | |{color:#000000}serialization.format{color}|
> | |{color:#000000}storage_handler {color}|
> | |{color:#000000}table_type {color}|
> | |{color:#000000}totalSize {color}|
> | |{color:#000000}transient_lastDdlTime{color}|
> | |{color:#000000}uuid {color}|
> | |{color:#000000}write.format.default{color}|
> | |{color:#000000}NULL{color}|
> |{color:#000000}*# Storage Information*{color}|{color:#000000}NULL{color}|
> |{color:#000000}*SerDe Library: *
> {color}|{color:#000000}org.apache.iceberg.mr.hive.HiveIcebergSerDe{color}|
> |{color:#000000}*InputFormat: *
> {color}|{color:#000000}org.apache.iceberg.mr.hive.HiveIcebergInputFormat{color}|
> |{color:#000000}*OutputFormat:*
> {color}|{color:#000000}org.apache.iceberg.mr.hive.HiveIcebergOutputFormat{color}|
> |{color:#000000}*Compressed:* {color}|{color:#000000}No
> {color}|
> |{color:#000000}*Sort Columns:* {color}|{color:#000000}[]
> {color}|
> However if we skip passing column definition the query fails
> {code:java}
> create table test_meta_copy2 stored by iceberg
> tblproperties('metadata_location'='s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json');{code}
> error
> {code:java}
> INFO : Compiling
> command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19):
> create table test_meta_copy2 stored by iceberg
> tblproperties('metadata_location'='s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json')
> INFO : Semantic Analysis Completed (retrial = false)
> INFO : Created Hive schema: Schema(fieldSchemas:null, properties:null)
> INFO : Completed compiling
> command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19);
> Time taken: 0.019 seconds
> INFO : Executing
> command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19):
> create table test_meta_copy2 stored by iceberg
> tblproperties('metadata_location'='s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json')
> INFO : Starting task [Stage-0:DDL] in serial mode
> ERROR : Failed
> org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException:
> MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please
> provide an existing table or a valid schema)
> at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1361)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1366)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.createTableNonReplaceMode(CreateTableOperation.java:158)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.execute(CreateTableOperation.java:116)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.ddl.DDLTask.execute(DDLTask.java:84)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:360)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:333)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:250)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:111)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:809)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:547)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:541)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:232)
> ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:89)
> ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:338)
> ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at java.security.AccessController.doPrivileged(Native Method) ~[?:?]
> at javax.security.auth.Subject.doAs(Subject.java:423) ~[?:?]
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
> ~[hadoop-common-3.1.1.7.2.15.4-6.jar:?]
> at
> org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:358)
> ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
> at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
> at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
> ~[?:?]
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
> ~[?:?]
> at java.lang.Thread.run(Thread.java:829) [?:?]
> Caused by: java.lang.RuntimeException:
> MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please
> provide an existing table or a valid schema)
> at
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:349)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> ... 28 more
> Caused by: org.apache.hadoop.hive.metastore.api.MetaException:
> org.apache.hadoop.hive.serde2.SerDeException Please provide an existing table
> or a valid schema
> at
> org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:123)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:80)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:347)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> ... 28 more
> ERROR : DDLTask failed, DDL Operation: class
> org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation
> org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException:
> MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please
> provide an existing table or a valid schema)
> at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1361)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1366)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.createTableNonReplaceMode(CreateTableOperation.java:158)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.execute(CreateTableOperation.java:116)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.ddl.DDLTask.execute(DDLTask.java:84)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:360)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:333)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:250)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:111)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:809)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:547)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:541)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:232)
> ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:89)
> ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:338)
> ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at java.security.AccessController.doPrivileged(Native Method) ~[?:?]
> at javax.security.auth.Subject.doAs(Subject.java:423) ~[?:?]
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
> ~[hadoop-common-3.1.1.7.2.15.4-6.jar:?]
> at
> org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:358)
> ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
> at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
> at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
> ~[?:?]
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
> ~[?:?]
> at java.lang.Thread.run(Thread.java:829) [?:?]
> Caused by: java.lang.RuntimeException:
> MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please
> provide an existing table or a valid schema)
> at
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:349)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> ... 28 more
> Caused by: org.apache.hadoop.hive.metastore.api.MetaException:
> org.apache.hadoop.hive.serde2.SerDeException Please provide an existing table
> or a valid schema
> at
> org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:123)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:80)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:347)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307)
> ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
> ... 28 more
> ERROR : FAILED: Execution Error, return code 40000 from
> org.apache.hadoop.hive.ql.ddl.DDLTask. java.lang.RuntimeException:
> MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please
> provide an existing table or a valid schema)
> INFO : Completed executing
> command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19);
> Time taken: 0.033 seconds
> INFO : OK {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)