Repository: hive Updated Branches: refs/heads/master 7fb4b1fed -> 2ed47838d
HIVE-12643 : For self describing InputFormat don't replicate schema information in partitions (Ashutosh Chauhan via Matt McCline) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2ed47838 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2ed47838 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2ed47838 Branch: refs/heads/master Commit: 2ed47838dc6cfee3fb6f4470427e50a4495b2eba Parents: 7fb4b1f Author: Ashutosh Chauhan <[email protected]> Authored: Wed Dec 9 17:26:00 2015 -0800 Committer: Ashutosh Chauhan <[email protected]> Committed: Mon May 23 16:51:12 2016 -0700 ---------------------------------------------------------------------- .../hadoop/hive/metastore/MetaStoreUtils.java | 68 ++++++++++++-------- .../apache/hadoop/hive/ql/exec/Utilities.java | 2 +- .../hive/ql/optimizer/GenMapRedUtils.java | 6 -- .../hive/ql/optimizer/physical/Vectorizer.java | 6 +- .../hadoop/hive/ql/plan/PartitionDesc.java | 14 +++- .../clientpositive/quotedid_tblproperty.q.out | 4 +- .../tez/vector_partition_diff_num_cols.q.out | 2 + .../vector_partition_diff_num_cols.q.out | 2 + 8 files changed, 63 insertions(+), 41 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index 6bc882a..84b24ab 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -1012,8 +1012,38 @@ public class MetaStoreUtils { return schema; } - public static Properties getSchema( - org.apache.hadoop.hive.metastore.api.StorageDescriptor sd, + public static Properties addCols(Properties schema, List<FieldSchema> cols) { + + StringBuilder colNameBuf = new StringBuilder(); + StringBuilder colTypeBuf = new StringBuilder(); + StringBuilder colComment = new StringBuilder(); + + boolean first = true; + for (FieldSchema col : cols) { + if (!first) { + colNameBuf.append(","); + colTypeBuf.append(":"); + colComment.append('\0'); + } + colNameBuf.append(col.getName()); + colTypeBuf.append(col.getType()); + colComment.append((null != col.getComment()) ? col.getComment() : ""); + first = false; + } + schema.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS, + colNameBuf.toString()); + String colTypes = colTypeBuf.toString(); + schema.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES, + colTypes); + schema.setProperty("columns.comments", colComment.toString()); + + return schema; + + } + + public static Properties getSchemaWithoutCols(org.apache.hadoop.hive.metastore.api.StorageDescriptor sd, org.apache.hadoop.hive.metastore.api.StorageDescriptor tblsd, Map<String, String> parameters, String databaseName, String tableName, List<FieldSchema> partitionKeys) { @@ -1063,30 +1093,7 @@ public class MetaStoreUtils { .getSerdeInfo().getSerializationLib()); } } - StringBuilder colNameBuf = new StringBuilder(); - StringBuilder colTypeBuf = new StringBuilder(); - StringBuilder colComment = new StringBuilder(); - boolean first = true; - for (FieldSchema col : tblsd.getCols()) { - if (!first) { - colNameBuf.append(","); - colTypeBuf.append(":"); - colComment.append('\0'); - } - colNameBuf.append(col.getName()); - colTypeBuf.append(col.getType()); - colComment.append((null != col.getComment()) ? col.getComment() : ""); - first = false; - } - String colNames = colNameBuf.toString(); - String colTypes = colTypeBuf.toString(); - schema.setProperty( - org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS, - colNames); - schema.setProperty( - org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES, - colTypes); - schema.setProperty("columns.comments", colComment.toString()); + if (sd.getCols() != null) { schema.setProperty( org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_DDL, @@ -1130,6 +1137,15 @@ public class MetaStoreUtils { return schema; } + public static Properties getSchema( + org.apache.hadoop.hive.metastore.api.StorageDescriptor sd, + org.apache.hadoop.hive.metastore.api.StorageDescriptor tblsd, + Map<String, String> parameters, String databaseName, String tableName, + List<FieldSchema> partitionKeys) { + + return addCols(getSchemaWithoutCols(sd, tblsd, parameters, databaseName, tableName, partitionKeys), tblsd.getCols()); + } + /** * Convert FieldSchemas to columnNames. */ http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 2ab9ed2..8144c3b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -732,7 +732,7 @@ public final class Utilities { } public static PartitionDesc getPartitionDesc(Partition part) throws HiveException { - return (new PartitionDesc(part)); + return new PartitionDesc(part); } public static PartitionDesc getPartitionDescFromTableDesc(TableDesc tblDesc, Partition part, http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index 812af9a..7595065 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -527,9 +527,6 @@ public final class GenMapRedUtils { Map<String, String> props = tsOp.getConf().getOpProps(); if (props != null) { Properties target = aliasPartnDesc.getProperties(); - if (target == null) { - aliasPartnDesc.setProperties(target = new Properties()); - } target.putAll(props); } @@ -668,9 +665,6 @@ public final class GenMapRedUtils { if (props != null) { Properties target = tblDesc.getProperties(); - if (target == null) { - tblDesc.setProperties(target = new Properties()); - } target.putAll(props); } http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 51e7a17..c1d6582 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1147,7 +1147,7 @@ public class Vectorizer implements PhysicalPlanResolver { class MapWorkVectorizationNodeProcessor extends VectorizationNodeProcessor { private final MapWork mWork; - private VectorTaskColumnInfo vectorTaskColumnInfo; + private final VectorTaskColumnInfo vectorTaskColumnInfo; private final boolean isTez; public MapWorkVectorizationNodeProcessor(MapWork mWork, boolean isTez, @@ -1205,9 +1205,9 @@ public class Vectorizer implements PhysicalPlanResolver { class ReduceWorkVectorizationNodeProcessor extends VectorizationNodeProcessor { - private VectorTaskColumnInfo vectorTaskColumnInfo; + private final VectorTaskColumnInfo vectorTaskColumnInfo; - private boolean isTez; + private final boolean isTez; private Operator<? extends OperatorDesc> rootVectorOp; http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java index 4d627ef..fe09bdf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java @@ -29,6 +29,7 @@ import java.util.Properties; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; @@ -85,10 +86,17 @@ public class PartitionDesc implements Serializable, Cloneable { public PartitionDesc(final Partition part) throws HiveException { PartitionDescConstructorHelper(part, getTableDesc(part.getTable()), true); - setProperties(part.getMetadataFromPartitionSchema()); + if(Utilities.isInputFileFormatSelfDescribing(this)) { + // if IF is self describing no need to send column info per partition, since its not used anyway. + Table tbl = part.getTable(); + setProperties(MetaStoreUtils.getSchemaWithoutCols(part.getTPartition().getSd(), part.getTPartition().getSd(), + part.getParameters(), tbl.getDbName(), tbl.getTableName(), tbl.getPartitionKeys())); + } else { + setProperties(part.getMetadataFromPartitionSchema()); + } } - /** + /** * @param part Partition * @param tblDesc Table Descriptor * @param usePartSchemaProperties Use Partition Schema Properties to set the @@ -190,7 +198,7 @@ public class PartitionDesc implements Serializable, Cloneable { Class<? extends OutputFormat> outputClass = outputFileFormatClass == null ? null : HiveFileFormatUtils.getOutputFormatSubstitute(outputFileFormatClass); if (outputClass != null) { - this.outputFileFormatClass = (Class<? extends HiveOutputFormat>) + this.outputFileFormatClass = (Class<? extends HiveOutputFormat>) CLASS_INTERNER.intern(outputClass); } else { this.outputFileFormatClass = outputClass; http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out b/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out index ca1dbe6..3204c7d 100644 --- a/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out +++ b/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out @@ -16,5 +16,5 @@ PREHOOK: Input: default@xyz POSTHOOK: query: describe xyz POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@xyz -valid_colname string -invalid.colname string +key string +value string http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out b/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out index f23a359..9b75892 100644 --- a/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out @@ -368,6 +368,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Reducer 2 Execution mode: vectorized Reduce Operator Tree: @@ -477,6 +478,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Reducer 2 Execution mode: vectorized Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out b/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out index ef92b89..b224da8 100644 --- a/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out +++ b/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out @@ -346,6 +346,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -447,6 +448,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0)
