Dharmik Thakkar created HIVE-26929:
--------------------------------------

             Summary: Allow creating iceberg tables without column definition 
when 'metadata_location' tblproperties is set.
                 Key: HIVE-26929
                 URL: https://issues.apache.org/jira/browse/HIVE-26929
             Project: Hive
          Issue Type: Improvement
          Components: Iceberg integration
            Reporter: Dharmik Thakkar


Allow creating iceberg tables without column definition when 
'metadata_location' tblproperties is set.

Iceberg supports pointing to external metadata.json file to infer table schema. 
Irrespective of the schema defined as part of create table statement the 
metadata.json is used to create table. We should allow creating table without 
column definition in case the metadata_location is defined in tblproperties.
{code:java}
create table test_meta (id int, name string, cgpa decimal) stored by iceberg 
stored as orc;
describe formatted test_meta;
create table test_meta_copy(id int) stored by iceberg 
tblproperties('metadata_location'='s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json');{code}
As a result of above SQL we get test_meta_copy with same schema as test_meta 
irrespective of the columns specified in create table statement.
|{color:#000000}*col_name*{color}|{color:#000000}*data_type*{color}|
|{color:#000000}*id*{color}|{color:#000000}int{color}|
|{color:#000000}*name*{color}|{color:#000000}string{color}|
|{color:#000000}*cgpa*{color}|{color:#000000}decimal(10,0){color}|
| |{color:#000000}NULL{color}|
|{color:#000000}*# Detailed Table 
Information*{color}|{color:#000000}NULL{color}|
|{color:#000000}*Database:*           
{color}|{color:#000000}iceberg_test_db_hive{color}|
|{color:#000000}*OwnerType: *         {color}|{color:#000000}USER               
 {color}|
|{color:#000000}*Owner: *             {color}|{color:#000000}hive               
 {color}|
|{color:#000000}*CreateTime:*         {color}|{color:#000000}Tue Jan 10 
21:49:08 UTC 2023{color}|
|{color:#000000}*LastAccessTime:*     {color}|{color:#000000}Fri Dec 12 
21:41:41 UTC 1969{color}|
|{color:#000000}*Retention: *         {color}|{color:#000000}2147483647{color}|
|{color:#000000}*Location:*           
{color}|{color:#000000}+s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta+{color}|
|{color:#000000}*Table Type:*         {color}|{color:#000000}EXTERNAL_TABLE     
 {color}|
|{color:#000000}*Table Parameters:*{color}|{color:#000000}NULL{color}|
| |{color:#000000}EXTERNAL            {color}|
| |{color:#000000}bucketing_version   {color}|
| |{color:#000000}engine.hive.enabled{color}|
| |{color:#000000}metadata_location   {color}|
| |{color:#000000}numFiles            {color}|
| |{color:#000000}numRows             {color}|
| |{color:#000000}rawDataSize         {color}|
| |{color:#000000}serialization.format{color}|
| |{color:#000000}storage_handler     {color}|
| |{color:#000000}table_type          {color}|
| |{color:#000000}totalSize           {color}|
| |{color:#000000}transient_lastDdlTime{color}|
| |{color:#000000}uuid                {color}|
| |{color:#000000}write.format.default{color}|
| |{color:#000000}NULL{color}|
|{color:#000000}*# Storage Information*{color}|{color:#000000}NULL{color}|
|{color:#000000}*SerDe Library: *     
{color}|{color:#000000}org.apache.iceberg.mr.hive.HiveIcebergSerDe{color}|
|{color:#000000}*InputFormat: *       
{color}|{color:#000000}org.apache.iceberg.mr.hive.HiveIcebergInputFormat{color}|
|{color:#000000}*OutputFormat:*       
{color}|{color:#000000}org.apache.iceberg.mr.hive.HiveIcebergOutputFormat{color}|
|{color:#000000}*Compressed:*         {color}|{color:#000000}No                 
 {color}|
|{color:#000000}*Sort Columns:*       {color}|{color:#000000}[]                 
 {color}|

However if we skip passing column definition the query fails
{code:java}
create table test_meta_copy2 stored by iceberg 
tblproperties('metadata_location'='s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json');{code}
error
{code:java}
INFO  : Compiling 
command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19): 
create table test_meta_copy2 stored by iceberg 
tblproperties('metadata_location'='s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json')
INFO  : Semantic Analysis Completed (retrial = false)
INFO  : Created Hive schema: Schema(fieldSchemas:null, properties:null)
INFO  : Completed compiling 
command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19); Time 
taken: 0.019 seconds
INFO  : Executing 
command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19): 
create table test_meta_copy2 stored by iceberg 
tblproperties('metadata_location'='s3a://qe-s3-bucket-weekly-dj5h-dwx-external/clusters/env-dqdj5h/warehouse-1673341391-kkzh/warehouse/tablespace/external/hive/iceberg_test_db_hive.db/test_meta/metadata/00000-7dfd7602-f5e1-4473-97cb-79377d358aa3.metadata.json')
INFO  : Starting task [Stage-0:DDL] in serial mode
ERROR : Failed
org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException: 
MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please 
provide an existing table or a valid schema)
        at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1361) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1366) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.createTableNonReplaceMode(CreateTableOperation.java:158)
 ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.execute(CreateTableOperation.java:116)
 ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.ddl.DDLTask.execute(DDLTask.java:84) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:360) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:333) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:250) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:111) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:809) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:547) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:541) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:232)
 ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:89)
 ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:338)
 ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at java.security.AccessController.doPrivileged(Native Method) ~[?:?]
        at javax.security.auth.Subject.doAs(Subject.java:423) ~[?:?]
        at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
 ~[hadoop-common-3.1.1.7.2.15.4-6.jar:?]
        at 
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:358)
 ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
        at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
        at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
        at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) 
~[?:?]
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) 
~[?:?]
        at java.lang.Thread.run(Thread.java:829) [?:?]
Caused by: java.lang.RuntimeException: 
MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please 
provide an existing table or a valid schema)
        at 
org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:349)
 ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        ... 28 more
Caused by: org.apache.hadoop.hive.metastore.api.MetaException: 
org.apache.hadoop.hive.serde2.SerDeException Please provide an existing table 
or a valid schema
        at 
org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:123)
 ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:80)
 ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:347)
 ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        ... 28 more
ERROR : DDLTask failed, DDL Operation: class 
org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation
org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException: 
MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please 
provide an existing table or a valid schema)
        at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1361) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1366) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.createTableNonReplaceMode(CreateTableOperation.java:158)
 ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation.execute(CreateTableOperation.java:116)
 ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.ddl.DDLTask.execute(DDLTask.java:84) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:360) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:333) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:250) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:111) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:809) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:547) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.Driver.run(Driver.java:541) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:232)
 ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:89)
 ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:338)
 ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at java.security.AccessController.doPrivileged(Native Method) ~[?:?]
        at javax.security.auth.Subject.doAs(Subject.java:423) ~[?:?]
        at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
 ~[hadoop-common-3.1.1.7.2.15.4-6.jar:?]
        at 
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:358)
 ~[hive-service-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
        at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
        at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) ~[?:?]
        at java.util.concurrent.FutureTask.run(FutureTask.java:264) ~[?:?]
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) 
~[?:?]
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) 
~[?:?]
        at java.lang.Thread.run(Thread.java:829) [?:?]
Caused by: java.lang.RuntimeException: 
MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please 
provide an existing table or a valid schema)
        at 
org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:349)
 ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        ... 28 more
Caused by: org.apache.hadoop.hive.metastore.api.MetaException: 
org.apache.hadoop.hive.serde2.SerDeException Please provide an existing table 
or a valid schema
        at 
org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:123)
 ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:80)
 ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:347)
 ~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at 
org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:329) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:1307) 
~[hive-exec-3.1.3000.2022.0.13.0-72.jar:3.1.3000.2022.0.13.0-72]
        ... 28 more
ERROR : FAILED: Execution Error, return code 40000 from 
org.apache.hadoop.hive.ql.ddl.DDLTask. java.lang.RuntimeException: 
MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Please 
provide an existing table or a valid schema)
INFO  : Completed executing 
command(queryId=hive_20230110220019_94ffafef-f531-4532-a07c-0e46e3879f19); Time 
taken: 0.033 seconds
INFO  : OK {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to