Repository: hive Updated Branches: refs/heads/master c2dc452ec -> 8dcead4ea
HIVE-11196 : Utilities.getPartitionDesc() should try to reuse TableDesc object (Hari Subramaniyan, reviewd by Jesus Camacho Rodriguez) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8dcead4e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8dcead4e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8dcead4e Branch: refs/heads/master Commit: 8dcead4ea1c1c6bb08eace1d8969694f8bff6ac7 Parents: c2dc452 Author: Hari Subramaniyan <harisan...@apache.org> Authored: Mon Jul 27 13:46:19 2015 -0700 Committer: Hari Subramaniyan <harisan...@apache.org> Committed: Mon Jul 27 13:46:19 2015 -0700 ---------------------------------------------------------------------- .../mapreduce/TestHCatMultiOutputFormat.java | 6 ++- .../apache/hadoop/hive/ql/exec/Utilities.java | 9 +++-- .../hive/ql/optimizer/GenMapRedUtils.java | 18 ++++++--- .../hive/ql/optimizer/SimpleFetchOptimizer.java | 5 ++- .../hadoop/hive/ql/plan/PartitionDesc.java | 39 ++++++++++++++++---- 5 files changed, 56 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/8dcead4e/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java ---------------------------------------------------------------------- diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java index 049de54..8148faa 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java @@ -46,6 +46,7 @@ import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; @@ -379,11 +380,12 @@ public class TestHCatMultiOutputFormat { List<Partition> partitions = hive.getPartitions(tbl); List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>(); List<Path> partLocs = new ArrayList<Path>(); + TableDesc tableDesc = Utilities.getTableDesc(tbl); for (Partition part : partitions) { partLocs.add(part.getDataLocation()); - partDesc.add(Utilities.getPartitionDesc(part)); + partDesc.add(Utilities.getPartitionDescFromTableDesc(tableDesc, part, true)); } - work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl)); + work = new FetchWork(partLocs, partDesc, tableDesc); work.setLimit(100); } else { work = new FetchWork(tbl.getDataLocation(), Utilities.getTableDesc(tbl)); http://git-wip-us.apache.org/repos/asf/hive/blob/8dcead4e/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index d8e463d..d578f11 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec; +import static com.google.common.base.Preconditions.checkNotNull; + import java.beans.DefaultPersistenceDelegate; import java.beans.Encoder; import java.beans.ExceptionListener; @@ -80,6 +82,7 @@ import java.util.zip.DeflaterOutputStream; import java.util.zip.InflaterInputStream; import org.antlr.runtime.CommonToken; +import org.apache.calcite.util.ChunkList; import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.WordUtils; @@ -1230,9 +1233,9 @@ public final class Utilities { return (new PartitionDesc(part)); } - public static PartitionDesc getPartitionDescFromTableDesc(TableDesc tblDesc, Partition part) - throws HiveException { - return new PartitionDesc(part, tblDesc); + public static PartitionDesc getPartitionDescFromTableDesc(TableDesc tblDesc, Partition part, + boolean usePartSchemaProperties) throws HiveException { + return new PartitionDesc(part, tblDesc, usePartSchemaProperties); } private static String getOpTreeSkel_helper(Operator<?> op, String indent) { http://git-wip-us.apache.org/repos/asf/hive/blob/8dcead4e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index 29854d8..693d8c7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -487,8 +487,6 @@ public final class GenMapRedUtils { ArrayList<PartitionDesc> partDesc = new ArrayList<PartitionDesc>(); Path tblDir = null; - TableDesc tblDesc = null; - plan.setNameToSplitSample(parseCtx.getNameToSplitSample()); if (partsList == null) { @@ -575,6 +573,8 @@ public final class GenMapRedUtils { //This read entity is a direct read entity and not an indirect read (that is when // this is being read because it is a dependency of a view). boolean isDirectRead = (parentViewInfo == null); + TableDesc tblDesc = null; + boolean initTableDesc = false; for (Partition part : parts) { if (part.getTable().isPartitioned()) { @@ -647,12 +647,18 @@ public final class GenMapRedUtils { // is it a partitioned table ? if (!part.getTable().isPartitioned()) { - assert ((tblDir == null) && (tblDesc == null)); + assert (tblDir == null); tblDir = paths[0]; - tblDesc = Utilities.getTableDesc(part.getTable()); + if (!initTableDesc) { + tblDesc = Utilities.getTableDesc(part.getTable()); + initTableDesc = true; + } } else if (tblDesc == null) { - tblDesc = Utilities.getTableDesc(part.getTable()); + if (!initTableDesc) { + tblDesc = Utilities.getTableDesc(part.getTable()); + initTableDesc = true; + } } if (props != null) { @@ -678,7 +684,7 @@ public final class GenMapRedUtils { partDesc.add(Utilities.getPartitionDesc(part)); } else { - partDesc.add(Utilities.getPartitionDescFromTableDesc(tblDesc, part)); + partDesc.add(Utilities.getPartitionDescFromTableDesc(tblDesc, part, false)); } } catch (HiveException e) { LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); http://git-wip-us.apache.org/repos/asf/hive/blob/8dcead4e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index 317454d..3859177 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -369,9 +369,10 @@ public class SimpleFetchOptimizer implements Transform { private FetchWork convertToWork() throws HiveException { inputs.clear(); + TableDesc tableDesc = Utilities.getTableDesc(table); if (!table.isPartitioned()) { inputs.add(new ReadEntity(table, parent, !table.isView() && parent == null)); - FetchWork work = new FetchWork(table.getPath(), Utilities.getTableDesc(table)); + FetchWork work = new FetchWork(table.getPath(), tableDesc); PlanUtils.configureInputJobPropertiesForStorageHandler(work.getTblDesc()); work.setSplitSample(splitSample); return work; @@ -382,7 +383,7 @@ public class SimpleFetchOptimizer implements Transform { for (Partition partition : partsList.getNotDeniedPartns()) { inputs.add(new ReadEntity(partition, parent, parent == null)); listP.add(partition.getDataLocation()); - partP.add(Utilities.getPartitionDesc(partition)); + partP.add(Utilities.getPartitionDescFromTableDesc(tableDesc, partition, true)); } Table sourceTable = partsList.getSourceTable(); inputs.add(new ReadEntity(sourceTable, parent, parent == null)); http://git-wip-us.apache.org/repos/asf/hive/blob/8dcead4e/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java index b123511..864301c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java @@ -81,18 +81,41 @@ public class PartitionDesc implements Serializable, Cloneable { } public PartitionDesc(final Partition part) throws HiveException { - this.tableDesc = Utilities.getTableDesc(part.getTable()); + PartitionDescConstructorHelper(part, Utilities.getTableDesc(part.getTable()), true); setProperties(part.getMetadataFromPartitionSchema()); - partSpec = part.getSpec(); - setInputFileFormatClass(part.getInputFormatClass()); - setOutputFileFormatClass(part.getOutputFormatClass()); } - public PartitionDesc(final Partition part,final TableDesc tblDesc) throws HiveException { + /** + * @param part Partition + * @param tblDesc Table Descriptor + * @param usePartSchemaProperties Use Partition Schema Properties to set the + * partition descriptor properties. This is usually set to true by the caller + * if the table is partitioned, i.e. if the table has partition columns. + * @throws HiveException + */ + public PartitionDesc(final Partition part,final TableDesc tblDesc, + boolean usePartSchemaProperties) + throws HiveException { + PartitionDescConstructorHelper(part,tblDesc, usePartSchemaProperties); + //We use partition schema properties to set the partition descriptor properties + // if usePartSchemaProperties is set to true. + if (usePartSchemaProperties) { + setProperties(part.getMetadataFromPartitionSchema()); + } else { + // each partition maintains a large properties + setProperties(part.getSchemaFromTableSchema(tblDesc.getProperties())); + } + } + + private void PartitionDescConstructorHelper(final Partition part,final TableDesc tblDesc, boolean setInputFileFormat) + throws HiveException { this.tableDesc = tblDesc; - setProperties(part.getSchemaFromTableSchema(tblDesc.getProperties())); // each partition maintains a large properties - partSpec = part.getSpec(); - setOutputFileFormatClass(part.getInputFormatClass()); + this.partSpec = part.getSpec(); + if (setInputFileFormat) { + setInputFileFormatClass(part.getInputFormatClass()); + } else { + setOutputFileFormatClass(part.getInputFormatClass()); + } setOutputFileFormatClass(part.getOutputFormatClass()); }