This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 76a214cbd15 HIVE-26628: Iceberg table is created when running explain
ctas command (Krisztian Kasa, reviewed by Denys Kuzmenko)
76a214cbd15 is described below
commit 76a214cbd15d322d4806f7ffed246275fd847089
Author: Krisztian Kasa <[email protected]>
AuthorDate: Tue Nov 15 18:27:31 2022 +0100
HIVE-26628: Iceberg table is created when running explain ctas command
(Krisztian Kasa, reviewed by Denys Kuzmenko)
---
.../org/apache/hadoop/hive/conf/Constants.java | 2 +
.../apache/iceberg/mr/hive/HiveIcebergSerDe.java | 11 +-
.../iceberg/mr/hive/HiveIcebergStorageHandler.java | 61 ++++-
.../positive/ctas_iceberg_partitioned_orc.q | 19 ++
.../positive/ctas_iceberg_partitioned_orc.q.out | 304 +++++++++++++++++++++
.../hive/ql/ddl/table/create/CreateTableDesc.java | 11 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 81 +++---
.../apache/hadoop/hive/ql/plan/PartitionDesc.java | 2 +-
.../org/apache/hadoop/hive/ql/plan/PlanUtils.java | 30 +-
.../org/apache/hadoop/hive/ql/plan/TableDesc.java | 14 +-
10 files changed, 469 insertions(+), 66 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/Constants.java
b/common/src/java/org/apache/hadoop/hive/conf/Constants.java
index c923c2bcce3..870fefd148b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/Constants.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/Constants.java
@@ -105,4 +105,6 @@ public class Constants {
public static final String HTTP_HEADER_REQUEST_TRACK = "Request-Track";
public static final String TIME_POSTFIX_REQUEST_TRACK = "_TIME";
+
+ public static final String EXPLAIN_CTAS_LOCATION = "explainCtasLocation";
}
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergSerDe.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergSerDe.java
index f9f01e3b2f0..6a6aa0884f0 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergSerDe.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergSerDe.java
@@ -28,6 +28,7 @@ import java.util.stream.Collectors;
import java.util.stream.IntStream;
import javax.annotation.Nullable;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
@@ -121,12 +122,14 @@ public class HiveIcebergSerDe extends AbstractSerDe {
this.partitionColumns = ImmutableList.of();
// create table for CTAS
if (e instanceof NoSuchTableException &&
-
Boolean.parseBoolean(serDeProperties.getProperty(hive_metastoreConstants.TABLE_IS_CTAS)))
{
+
Boolean.parseBoolean(serDeProperties.getProperty(hive_metastoreConstants.TABLE_IS_CTAS)))
{
if (!Catalogs.hiveCatalog(configuration, serDeProperties)) {
throw new SerDeException(CTAS_EXCEPTION_MSG);
}
- createTableForCTAS(configuration, serDeProperties);
+ if (!serDeProperties.containsKey(Constants.EXPLAIN_CTAS_LOCATION)) {
+ createTableForCTAS(configuration, serDeProperties);
+ }
}
}
}
@@ -321,4 +324,8 @@ public class HiveIcebergSerDe extends AbstractSerDe {
public boolean shouldStoreFieldsInMetastore(Map<String, String> tableParams)
{
return true;
}
+
+ public Schema getTableSchema() {
+ return tableSchema;
+ }
}
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index ada8eeb2b9e..f96b5e1bfae 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -35,11 +35,13 @@ import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
+import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.common.type.SnapshotContext;
import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
@@ -90,6 +92,7 @@ import org.apache.iceberg.BaseTable;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.ManifestFile;
import org.apache.iceberg.NullOrder;
+import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.RowLevelOperationMode;
import org.apache.iceberg.Schema;
@@ -102,6 +105,7 @@ import org.apache.iceberg.SortField;
import org.apache.iceberg.SortOrder;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableProperties;
+import org.apache.iceberg.exceptions.NoSuchTableException;
import org.apache.iceberg.hadoop.HadoopConfigurable;
import org.apache.iceberg.mr.Catalogs;
import org.apache.iceberg.mr.InputFormatConfig;
@@ -804,31 +808,60 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
@VisibleForTesting
static void overlayTableProperties(Configuration configuration, TableDesc
tableDesc, Map<String, String> map) {
Properties props = tableDesc.getProperties();
- Table table = IcebergTableUtil.getTable(configuration, props);
- String schemaJson = SchemaParser.toJson(table.schema());
Maps.fromProperties(props).entrySet().stream()
.filter(entry -> !map.containsKey(entry.getKey())) // map overrides
tableDesc properties
.forEach(entry -> map.put(entry.getKey(), entry.getValue()));
- map.put(InputFormatConfig.TABLE_IDENTIFIER,
props.getProperty(Catalogs.NAME));
- map.put(InputFormatConfig.TABLE_LOCATION, table.location());
- map.put(InputFormatConfig.TABLE_SCHEMA, schemaJson);
- props.put(InputFormatConfig.PARTITION_SPEC,
PartitionSpecParser.toJson(table.spec()));
+ String location;
+ Schema schema;
+ PartitionSpec spec;
+ try {
+ Table table = IcebergTableUtil.getTable(configuration, props);
+ location = table.location();
+ schema = table.schema();
+ spec = table.spec();
+
+ // serialize table object into config
+ Table serializableTable = SerializableTable.copyOf(table);
+ checkAndSkipIoConfigSerialization(configuration, serializableTable);
+ map.put(InputFormatConfig.SERIALIZED_TABLE_PREFIX +
tableDesc.getTableName(),
+ SerializationUtil.serializeToBase64(serializableTable));
+ } catch (NoSuchTableException ex) {
+ if
(!(StringUtils.isNotBlank(props.getProperty(hive_metastoreConstants.TABLE_IS_CTAS))
&&
+
StringUtils.isNotBlank(props.getProperty(Constants.EXPLAIN_CTAS_LOCATION)))) {
+ throw ex;
+ }
- // serialize table object into config
- Table serializableTable = SerializableTable.copyOf(table);
- checkAndSkipIoConfigSerialization(configuration, serializableTable);
- map.put(InputFormatConfig.SERIALIZED_TABLE_PREFIX +
tableDesc.getTableName(),
- SerializationUtil.serializeToBase64(serializableTable));
+ location = map.get(hive_metastoreConstants.META_TABLE_LOCATION);
+ if (StringUtils.isBlank(location)) {
+ location = props.getProperty(Constants.EXPLAIN_CTAS_LOCATION);
+ }
- // We need to remove this otherwise the job.xml will be invalid as column
comments are separated with '\0' and
- // the serialization utils fail to serialize this character
- map.remove("columns.comments");
+ try {
+ AbstractSerDe serDe = tableDesc.getDeserializer(configuration);
+ HiveIcebergSerDe icebergSerDe = (HiveIcebergSerDe) serDe;
+ schema = icebergSerDe.getTableSchema();
+ spec = IcebergTableUtil.spec(configuration,
icebergSerDe.getTableSchema());
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+ map.put(InputFormatConfig.TABLE_IDENTIFIER,
props.getProperty(Catalogs.NAME));
+ if (StringUtils.isNotBlank(location)) {
+ map.put(InputFormatConfig.TABLE_LOCATION, location);
+ }
+ String schemaJson = SchemaParser.toJson(schema);
+ map.put(InputFormatConfig.TABLE_SCHEMA, schemaJson);
// save schema into table props as well to avoid repeatedly hitting the
HMS during serde initializations
// this is an exception to the interface documentation, but it's a safe
operation to add this property
props.put(InputFormatConfig.TABLE_SCHEMA, schemaJson);
+ props.put(InputFormatConfig.PARTITION_SPEC,
PartitionSpecParser.toJson(spec));
+
+ // We need to remove this otherwise the job.xml will be invalid as column
comments are separated with '\0' and
+ // the serialization utils fail to serialize this character
+ map.remove("columns.comments");
}
/**
diff --git
a/iceberg/iceberg-handler/src/test/queries/positive/ctas_iceberg_partitioned_orc.q
b/iceberg/iceberg-handler/src/test/queries/positive/ctas_iceberg_partitioned_orc.q
new file mode 100644
index 00000000000..3897c0e4ca0
--- /dev/null
+++
b/iceberg/iceberg-handler/src/test/queries/positive/ctas_iceberg_partitioned_orc.q
@@ -0,0 +1,19 @@
+set
hive.query.lifetime.hooks=org.apache.iceberg.mr.hive.HiveIcebergQueryLifeTimeHook;
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+set hive.explain.user=false;
+
+create table source(a int, b string, c int);
+
+insert into source values (1, 'one', 3);
+insert into source values (1, 'two', 4);
+
+explain extended
+create external table tbl_ice partitioned by spec (bucket(16, a), truncate(3,
b)) stored by iceberg stored as orc tblproperties ('format-version'='2') as
+select a, b, c from source;
+
+create external table tbl_ice partitioned by spec (bucket(16, a), truncate(3,
b)) stored by iceberg stored as orc tblproperties ('format-version'='2') as
+select a, b, c from source;
+
+describe formatted tbl_ice;
+
+select * from tbl_ice;
diff --git
a/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_partitioned_orc.q.out
b/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_partitioned_orc.q.out
new file mode 100644
index 00000000000..863d41cc5ae
--- /dev/null
+++
b/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_partitioned_orc.q.out
@@ -0,0 +1,304 @@
+PREHOOK: query: create table source(a int, b string, c int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@source
+POSTHOOK: query: create table source(a int, b string, c int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@source
+PREHOOK: query: insert into source values (1, 'one', 3)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@source
+POSTHOOK: query: insert into source values (1, 'one', 3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@source
+POSTHOOK: Lineage: source.a SCRIPT []
+POSTHOOK: Lineage: source.b SCRIPT []
+POSTHOOK: Lineage: source.c SCRIPT []
+PREHOOK: query: insert into source values (1, 'two', 4)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@source
+POSTHOOK: query: insert into source values (1, 'two', 4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@source
+POSTHOOK: Lineage: source.a SCRIPT []
+POSTHOOK: Lineage: source.b SCRIPT []
+POSTHOOK: Lineage: source.c SCRIPT []
+PREHOOK: query: explain extended
+create external table tbl_ice partitioned by spec (bucket(16, a), truncate(3,
b)) stored by iceberg stored as orc tblproperties ('format-version'='2') as
+select a, b, c from source
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@source
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain extended
+create external table tbl_ice partitioned by spec (bucket(16, a), truncate(3,
b)) stored by iceberg stored as orc tblproperties ('format-version'='2') as
+select a, b, c from source
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@source
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+OPTIMIZED SQL: SELECT `a`, `b`, `c`
+FROM `default`.`source`
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-0, Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: source
+ Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE
Column stats: COMPLETE
+ GatherStats: false
+ Select Operator
+ expressions: a (type: int), b (type: string), c (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 1
+ directory: hdfs://### HDFS PATH ###
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ Stats Publishing Key Prefix: hdfs://### HDFS PATH ###
+ table:
+ input format:
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+ jobProperties:
+ bucketing_version -1
+ columns a,b,c
+ columns.types int:string:int
+ created_with_ctas true
+ format-version 2
+ iceberg.mr.operation.type.default.tbl_ice OTHER
+ iceberg.mr.table.identifier default.tbl_ice
+ iceberg.mr.table.location hdfs://### HDFS PATH ###
+ iceberg.mr.table.schema
{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"a","required":false,"type":"int"},{"id":2,"name":"b","required":false,"type":"string"},{"id":3,"name":"c","required":false,"type":"int"}]}
+ mapred.output.committer.class
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler$HiveIcebergNoJobCommitter
+ name default.tbl_ice
+ serialization.format 1
+ serialization.lib
org.apache.iceberg.mr.hive.HiveIcebergSerDe
+ storage_handler
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+ write.format.default orc
+ output format:
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+ properties:
+ bucketing_version -1
+ columns a,b,c
+ columns.types int:string:int
+ format-version 2
+ iceberg.mr.operation.type.default.tbl_ice OTHER
+ iceberg.mr.table.partition.spec
{"spec-id":0,"fields":[{"name":"a_bucket","transform":"bucket[16]","source-id":1,"field-id":1000},{"name":"b_trunc","transform":"truncate[3]","source-id":2,"field-id":1001}]}
+ iceberg.mr.table.schema
{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"a","required":false,"type":"int"},{"id":2,"name":"b","required":false,"type":"string"},{"id":3,"name":"c","required":false,"type":"int"}]}
+ name default.tbl_ice
+ serialization.format 1
+ serialization.lib
org.apache.iceberg.mr.hive.HiveIcebergSerDe
+ storage_handler
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+ write.format.default orc
+ serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+ name: default.tbl_ice
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string),
_col2 (type: int)
+ outputColumnNames: col1, col2, col3
+ Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(col1), max(col1), count(1),
count(col1), compute_bit_vector_hll(col1), max(length(col2)),
avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector_hll(col2),
min(col3), max(col3), count(col3), compute_bit_vector_hll(col3)
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 560 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ null sort order:
+ numBuckets: -1
+ sort order:
+ Statistics: Num rows: 1 Data size: 560 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col0 (type: int), _col1 (type:
int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5
(type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), _col7
(type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int),
_col11 (type: bigint), _col12 (type: binary)
+ auto parallelism: false
+ Execution mode: vectorized
+ Path -> Alias:
+ hdfs://### HDFS PATH ### [source]
+ Path -> Partition:
+ hdfs://### HDFS PATH ###
+ Partition
+ base file name: source
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,c
+ columns.types int:string:int
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.source
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,c
+ columns.comments
+ columns.types int:string:int
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.source
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.source
+ name: default.source
+ Truncated Path -> Alias:
+ /source [source]
+ Reducer 2
+ Execution mode: vectorized
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4),
max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7),
compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10),
count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: 'LONG' (type: string), UDFToLong(_col0) (type:
bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint),
COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary),
'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint),
COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint),
COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary),
'LONG' (type: string), UDFToLong(_col9) (typ [...]
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15,
_col16, _col17
+ Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+ directory: hdfs://### HDFS PATH ###
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 794 Basic stats:
COMPLETE Column stats: COMPLETE
+ Stats Publishing Key Prefix: hdfs://### HDFS PATH ###
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns
_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17
+ columns.types
string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:bigint:bigint:bigint:binary
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels
true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+ Stats Aggregation Key Prefix: hdfs://### HDFS PATH ###
+ Column Stats Desc:
+ Columns: a, b, c
+ Column Types: int, string, int
+ Table: default.tbl_ice
+ Is Table Level Stats: true
+
+ Stage: Stage-0
+ Move Operator
+ files:
+ hdfs directory: true
+ source: hdfs://### HDFS PATH ###
+ destination: hdfs://### HDFS PATH ###
+
+PREHOOK: query: create external table tbl_ice partitioned by spec (bucket(16,
a), truncate(3, b)) stored by iceberg stored as orc tblproperties
('format-version'='2') as
+select a, b, c from source
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@source
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: create external table tbl_ice partitioned by spec (bucket(16,
a), truncate(3, b)) stored by iceberg stored as orc tblproperties
('format-version'='2') as
+select a, b, c from source
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@source
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+PREHOOK: query: describe formatted tbl_ice
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_ice
+POSTHOOK: query: describe formatted tbl_ice
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_ice
+# col_name data_type comment
+a int
+b string
+c int
+
+# Partition Transform Information
+# col_name transform_type
+a BUCKET[16]
+b TRUNCATE[3]
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 2147483647
+#### A masked pattern was here ####
+Table Type: EXTERNAL_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
+ EXTERNAL TRUE
+ bucketing_version -1
+ engine.hive.enabled true
+ iceberg.orc.files.only true
+ metadata_location hdfs://### HDFS PATH ###
+ numFiles 2
+ numRows 2
+ previous_metadata_location hdfs://### HDFS PATH ###
+ serialization.format 1
+ storage_handler
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+ table_type ICEBERG
+ totalSize 812
+#### A masked pattern was here ####
+ uuid #Masked#
+ write.format.default orc
+
+# Storage Information
+SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+Compressed: No
+Sort Columns: []
+PREHOOK: query: select * from tbl_ice
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from tbl_ice
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1 one 3
+1 two 4
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java
index eeac3a6d339..75179f38cac 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java
@@ -21,7 +21,6 @@ package org.apache.hadoop.hive.ql.ddl.table.create;
import java.io.Serializable;
import java.util.ArrayList;
-import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -73,6 +72,8 @@ import org.apache.hadoop.mapred.OutputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.hadoop.hive.conf.Constants.EXPLAIN_CTAS_LOCATION;
+
/**
* DDL task description for CREATE TABLE commands.
*/
@@ -518,10 +519,10 @@ public class CreateTableDesc implements DDLDesc,
Serializable {
@Explain(displayName = "table properties")
public Map<String, String> getTblPropsExplain() { // only for displaying plan
- HashMap<String, String> copy = new HashMap<>(tblProps);
- copy.remove(hive_metastoreConstants.TABLE_IS_CTAS);
- copy.remove(hive_metastoreConstants.TABLE_BUCKETING_VERSION);
- return copy;
+ return PlanUtils.getPropertiesForExplain(tblProps,
+ EXPLAIN_CTAS_LOCATION,
+ hive_metastoreConstants.TABLE_IS_CTAS,
+ hive_metastoreConstants.TABLE_BUCKETING_VERSION);
}
/**
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 4373643aea4..a298e5355b2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -21,10 +21,12 @@ package org.apache.hadoop.hive.ql.parse;
import static java.util.Objects.nonNull;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
import static org.apache.hadoop.hive.common.AcidConstants.SOFT_DELETE_TABLE;
+import static org.apache.hadoop.hive.conf.Constants.EXPLAIN_CTAS_LOCATION;
import static
org.apache.hadoop.hive.conf.HiveConf.ConfVars.DYNAMICPARTITIONCONVERT;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVEARCHIVEENABLED;
import static
org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_DEFAULT_STORAGE_HANDLER;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS;
+import static
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_LOCATION;
import static
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE;
import static
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_CTAS;
import static
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.DEFAULT_TABLE_TYPE;
@@ -7731,36 +7733,57 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
isDestTempFile = false;
}
- if (tblDesc == null) {
- if (viewDesc != null) {
- tableDescriptor = PlanUtils.getTableDesc(viewDesc, cols, colTypes);
- } else if (qb.getIsQuery()) {
- Class<? extends Deserializer> serdeClass = LazySimpleSerDe.class;
- String fileFormat = conf.getResultFileFormat().toString();
- if (SessionState.get().getIsUsingThriftJDBCBinarySerDe()) {
- serdeClass = ThriftJDBCBinarySerDe.class;
- fileFormat = ResultFileFormat.SEQUENCEFILE.toString();
- // Set the fetch formatter to be a no-op for the ListSinkOperator,
since we'll
- // write out formatted thrift objects to SequenceFile
- conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER,
NoOpFetchFormatter.class.getName());
- } else if (fileFormat.equals(PlanUtils.LLAP_OUTPUT_FORMAT_KEY)) {
- // If this output format is Llap, check to see if Arrow is
requested
- boolean useArrow = HiveConf.getBoolVar(conf,
HiveConf.ConfVars.LLAP_OUTPUT_FORMAT_ARROW);
- serdeClass = useArrow ? ArrowColumnarBatchSerDe.class :
LazyBinarySerDe2.class;
- }
- tableDescriptor = PlanUtils.getDefaultQueryOutputTableDesc(cols,
colTypes, fileFormat,
+ try {
+ if (tblDesc == null) {
+ if (viewDesc != null) {
+ destinationTable = viewDesc.toTable(conf);
+ tableDescriptor = PlanUtils.getTableDesc(viewDesc, cols, colTypes);
+ } else if (qb.getIsQuery()) {
+ Class<? extends Deserializer> serdeClass = LazySimpleSerDe.class;
+ String fileFormat = conf.getResultFileFormat().toString();
+ if (SessionState.get().getIsUsingThriftJDBCBinarySerDe()) {
+ serdeClass = ThriftJDBCBinarySerDe.class;
+ fileFormat = ResultFileFormat.SEQUENCEFILE.toString();
+ // Set the fetch formatter to be a no-op for the
ListSinkOperator, since we'll
+ // write out formatted thrift objects to SequenceFile
+ conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER,
NoOpFetchFormatter.class.getName());
+ } else if (fileFormat.equals(PlanUtils.LLAP_OUTPUT_FORMAT_KEY)) {
+ // If this output format is Llap, check to see if Arrow is
requested
+ boolean useArrow = HiveConf.getBoolVar(conf,
HiveConf.ConfVars.LLAP_OUTPUT_FORMAT_ARROW);
+ serdeClass = useArrow ? ArrowColumnarBatchSerDe.class :
LazyBinarySerDe2.class;
+ }
+ tableDescriptor = PlanUtils.getDefaultQueryOutputTableDesc(cols,
colTypes, fileFormat,
serdeClass);
+ destinationTable = null;
+ } else {
+ tableDescriptor =
PlanUtils.getDefaultTableDesc(qb.getDirectoryDesc(), cols, colTypes);
+ }
} else {
- tableDescriptor =
PlanUtils.getDefaultTableDesc(qb.getDirectoryDesc(), cols, colTypes);
+ destinationTable =
db.getTranslateTableDryrun(tblDesc.toTable(conf).getTTable());
+ if (ctx.isExplainPlan() &&
+ tblDesc.getTblProps().containsKey(TABLE_IS_CTAS) &&
+ !tblDesc.getTblProps().containsKey(META_TABLE_LOCATION)) {
+ if (destinationTable.getDataLocation() == null) {
+ // no metastore.metadata.transformer.class was set
+ tblDesc.getTblProps().put(EXPLAIN_CTAS_LOCATION, new
Warehouse(conf).getDefaultTablePath(
+ destinationTable.getDbName(),
+ destinationTable.getTableName(),
+
Boolean.parseBoolean(destinationTable.getParameters().get("EXTERNAL"))).toString());
+ } else {
+ tblDesc.getTblProps().put(EXPLAIN_CTAS_LOCATION,
destinationTable.getDataLocation().toString());
+ }
+ }
+ tableDescriptor = PlanUtils.getTableDesc(tblDesc, cols, colTypes);
}
- } else {
- tableDescriptor = PlanUtils.getTableDesc(tblDesc, cols, colTypes);
+ } catch (HiveException | MetaException e) {
+ throw new SemanticException(e);
}
+
// if available, set location in table desc properties
if (tblDesc != null && tblDesc.getLocation() != null && tableDescriptor
!= null &&
-
!tableDescriptor.getProperties().containsKey(hive_metastoreConstants.META_TABLE_LOCATION))
{
-
tableDescriptor.getProperties().setProperty(hive_metastoreConstants.META_TABLE_LOCATION,
tblDesc.getLocation());
+ !tableDescriptor.getProperties().containsKey(META_TABLE_LOCATION)) {
+ tableDescriptor.getProperties().setProperty(META_TABLE_LOCATION,
tblDesc.getLocation());
}
// We need a specific rowObjectInspector in this case
@@ -7773,13 +7796,6 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
boolean isDfsDir = (destType == QBMetaData.DEST_DFS_FILE);
- try {
- destinationTable = tblDesc != null ?
db.getTranslateTableDryrun(tblDesc.toTable(conf).getTTable()) :
- viewDesc != null ? viewDesc.toTable(conf) : null;
- } catch (HiveException e) {
- throw new SemanticException(e);
- }
-
destTableIsFullAcid = AcidUtils.isFullAcidTable(destinationTable);
// Data organization (DISTRIBUTED, SORTED, CLUSTERED) for materialized
view
@@ -13884,7 +13900,7 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
throw new SemanticException(e);
}
}
- if (!SessionStateUtil.addResource(conf,
hive_metastoreConstants.META_TABLE_LOCATION, tblLocation)) {
+ if (!SessionStateUtil.addResource(conf, META_TABLE_LOCATION,
tblLocation)) {
throw new SemanticException(
"Query state attached to Session state must be not null. Table
location cannot be saved.");
}
@@ -14009,6 +14025,9 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
isTransactional, isManaged, new String[]{qualifiedTabName.getDb(),
qualifiedTabName.getTable()}, isDefaultTableTypeChanged);
isExt = isExternalTableChanged(tblProps, isTransactional, isExt,
isDefaultTableTypeChanged);
tblProps.put(TABLE_IS_CTAS, "true");
+ if (ctx.isExplainPlan()) {
+ tblProps.put(EXPLAIN_CTAS_LOCATION, "");
+ }
addDbAndTabToOutputs(new String[] {qualifiedTabName.getDb(),
qualifiedTabName.getTable()},
TableType.MANAGED_TABLE, isTemporary, tblProps, storageFormat);
tableDesc = new CreateTableDesc(qualifiedTabName, isExt, isTemporary,
cols,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
index 875a093abb0..5358e100660 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
@@ -213,7 +213,7 @@ public class PartitionDesc implements Serializable,
Cloneable {
@Explain(displayName = "properties", explainLevels = { Level.EXTENDED })
public Map getPropertiesExplain() {
- return PlanUtils.getPropertiesExplain(getProperties());
+ return PlanUtils.getPropertiesForExplain(getProperties());
}
public void setProperties(final Properties properties) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
index 36f75afcb77..80bab489620 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.plan;
+import static org.apache.hadoop.hive.conf.Constants.EXPLAIN_CTAS_LOCATION;
import static
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_CTAS;
import static org.apache.hive.common.util.HiveStringUtils.quoteComments;
@@ -26,14 +27,12 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
-import java.util.EnumSet;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
-import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.JavaUtils;
@@ -1223,11 +1222,13 @@ public final class PlanUtils {
return LazySimpleSerDe.class;
}
+ private static final String[] FILTER_OUT_FROM_EXPLAIN = {TABLE_IS_CTAS,
EXPLAIN_CTAS_LOCATION};
+
/**
* Get a Map of table or partition properties to be used in explain extended
output.
* Do some filtering to make output readable and/or concise.
*/
- static Map getPropertiesExplain(Properties properties) {
+ static Map<Object, Object> getPropertiesForExplain(Properties properties) {
if (properties != null) {
Map<Object, Object> clone = null;
String value = properties.getProperty("columns.comments");
@@ -1244,11 +1245,13 @@ public final class PlanUtils {
}
clone.remove(StatsSetupConst.NUM_ERASURE_CODED_FILES);
}
- if (properties.containsKey(TABLE_IS_CTAS)) {
- if (clone == null) {
- clone = new HashMap<>(properties);
+ for (String key : FILTER_OUT_FROM_EXPLAIN) {
+ if (properties.containsKey(key)) {
+ if (clone == null) {
+ clone = new HashMap<>(properties);
+ }
+ clone.remove(key);
}
- clone.remove(TABLE_IS_CTAS);
}
if (clone != null) {
return clone;
@@ -1256,4 +1259,17 @@ public final class PlanUtils {
}
return properties;
}
+
+ public static Map<String, String> getPropertiesForExplain(Map<String,
String> properties, String... propertiesToRemove) {
+ if (properties == null) {
+ return Collections.emptyMap();
+ }
+
+ Map<String, String> clone = new HashMap<>(properties);
+ for (String key : propertiesToRemove) {
+ clone.remove(key);
+ }
+
+ return clone;
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java
b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java
index 4ead854c1a9..12366eff289 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java
@@ -26,14 +26,10 @@ import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
import org.apache.hadoop.hive.ql.plan.Explain.Level;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
-import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hive.common.util.ReflectionUtil;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.util.Enumeration;
@@ -41,6 +37,8 @@ import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Properties;
+import static org.apache.hadoop.hive.conf.Constants.EXPLAIN_CTAS_LOCATION;
+
/**
* TableDesc.
*
@@ -129,7 +127,7 @@ public class TableDesc implements Serializable, Cloneable {
@Explain(displayName = "properties", explainLevels = { Level.EXTENDED })
public Map getPropertiesExplain() {
- return PlanUtils.getPropertiesExplain(getProperties());
+ return PlanUtils.getPropertiesForExplain(getProperties());
}
public void setProperties(final Properties properties) {
@@ -141,11 +139,15 @@ public class TableDesc implements Serializable, Cloneable
{
this.jobProperties = jobProperties;
}
- @Explain(displayName = "jobProperties", explainLevels = { Level.EXTENDED })
public Map<String, String> getJobProperties() {
return jobProperties;
}
+ @Explain(displayName = "jobProperties", explainLevels = { Level.EXTENDED })
+ public Map<String, String> getJobPropertiesExplain() {
+ return PlanUtils.getPropertiesForExplain(jobProperties,
EXPLAIN_CTAS_LOCATION);
+ }
+
public void setJobSecrets(Map<String, String> jobSecrets) {
this.jobSecrets = jobSecrets;
}