HIVE-14542 - VirtualColumn::equals() should use object equality (Eugene Koifman, reviewed by Gopal V)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0755348d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0755348d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0755348d Branch: refs/heads/hive-14535 Commit: 0755348d781ab96a260db2a4fc6a4eabb8986d73 Parents: 943a361 Author: Eugene Koifman <[email protected]> Authored: Thu Sep 8 17:45:26 2016 -0700 Committer: Eugene Koifman <[email protected]> Committed: Thu Sep 8 17:45:26 2016 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/exec/MapOperator.java | 110 ++++++++++--------- .../hadoop/hive/ql/metadata/VirtualColumn.java | 41 ++----- 2 files changed, 67 insertions(+), 84 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/0755348d/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java index f3eed75..4bdd3c9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -542,61 +542,67 @@ public class MapOperator extends AbstractMapOperator { vcValues = new Object[vcs.size()]; } for (int i = 0; i < vcs.size(); i++) { - VirtualColumn vc = vcs.get(i); - if (vc.equals(VirtualColumn.FILENAME)) { - if (ctx.inputFileChanged()) { - vcValues[i] = new Text(ctx.getCurrentInputPath().toString()); - } - } else if (vc.equals(VirtualColumn.BLOCKOFFSET)) { - long current = ctx.getIoCxt().getCurrentBlockStart(); - LongWritable old = (LongWritable) vcValues[i]; - if (old == null) { - old = new LongWritable(current); - vcValues[i] = old; - continue; - } - if (current != old.get()) { - old.set(current); - } - } else if (vc.equals(VirtualColumn.ROWOFFSET)) { - long current = ctx.getIoCxt().getCurrentRow(); - LongWritable old = (LongWritable) vcValues[i]; - if (old == null) { - old = new LongWritable(current); - vcValues[i] = old; - continue; - } - if (current != old.get()) { - old.set(current); - } - } else if (vc.equals(VirtualColumn.RAWDATASIZE)) { - long current = 0L; - SerDeStats stats = deserializer.getSerDeStats(); - if(stats != null) { - current = stats.getRawDataSize(); - } - LongWritable old = (LongWritable) vcValues[i]; - if (old == null) { - old = new LongWritable(current); - vcValues[i] = old; - continue; - } - if (current != old.get()) { - old.set(current); - } - } - else if(vc.equals(VirtualColumn.ROWID)) { - if(ctx.getIoCxt().getRecordIdentifier() == null) { - vcValues[i] = null; + switch(vcs.get(i)) { + case FILENAME : + if (ctx.inputFileChanged()) { + vcValues[i] = new Text(ctx.getCurrentInputPath().toString()); + } + break; + case BLOCKOFFSET: { + long current = ctx.getIoCxt().getCurrentBlockStart(); + LongWritable old = (LongWritable) vcValues[i]; + if (old == null) { + old = new LongWritable(current); + vcValues[i] = old; + continue; + } + if (current != old.get()) { + old.set(current); + } } - else { - if(vcValues[i] == null) { - vcValues[i] = new Object[RecordIdentifier.Field.values().length]; + break; + case ROWOFFSET: { + long current = ctx.getIoCxt().getCurrentRow(); + LongWritable old = (LongWritable) vcValues[i]; + if (old == null) { + old = new LongWritable(current); + vcValues[i] = old; + continue; + } + if (current != old.get()) { + old.set(current); } - RecordIdentifier.StructInfo.toArray(ctx.getIoCxt().getRecordIdentifier(), (Object[])vcValues[i]); - ctx.getIoCxt().setRecordIdentifier(null);//so we don't accidentally cache the value; shouldn't - //happen since IO layer either knows how to produce ROW__ID or not - but to be safe } + break; + case RAWDATASIZE: + long current = 0L; + SerDeStats stats = deserializer.getSerDeStats(); + if(stats != null) { + current = stats.getRawDataSize(); + } + LongWritable old = (LongWritable) vcValues[i]; + if (old == null) { + old = new LongWritable(current); + vcValues[i] = old; + continue; + } + if (current != old.get()) { + old.set(current); + } + break; + case ROWID: + if(ctx.getIoCxt().getRecordIdentifier() == null) { + vcValues[i] = null; + } + else { + if(vcValues[i] == null) { + vcValues[i] = new Object[RecordIdentifier.Field.values().length]; + } + RecordIdentifier.StructInfo.toArray(ctx.getIoCxt().getRecordIdentifier(), (Object[])vcValues[i]); + ctx.getIoCxt().setRecordIdentifier(null);//so we don't accidentally cache the value; shouldn't + //happen since IO layer either knows how to produce ROW__ID or not - but to be safe + } + break; } } return vcValues; http://git-wip-us.apache.org/repos/asf/hive/blob/0755348d/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java index 3549143..044d64c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.metadata; -import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -41,19 +40,16 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @InterfaceAudience.Private -public class VirtualColumn implements Serializable { +public enum VirtualColumn { + FILENAME("INPUT__FILE__NAME", TypeInfoFactory.stringTypeInfo), + BLOCKOFFSET("BLOCK__OFFSET__INSIDE__FILE", TypeInfoFactory.longTypeInfo), + ROWOFFSET("ROW__OFFSET__INSIDE__BLOCK", TypeInfoFactory.longTypeInfo), - private static final long serialVersionUID = 1L; - - public static final VirtualColumn FILENAME = new VirtualColumn("INPUT__FILE__NAME", (PrimitiveTypeInfo)TypeInfoFactory.stringTypeInfo); - public static final VirtualColumn BLOCKOFFSET = new VirtualColumn("BLOCK__OFFSET__INSIDE__FILE", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo); - public static final VirtualColumn ROWOFFSET = new VirtualColumn("ROW__OFFSET__INSIDE__BLOCK", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo); - - public static final VirtualColumn RAWDATASIZE = new VirtualColumn("RAW__DATA__SIZE", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo); + RAWDATASIZE("RAW__DATA__SIZE", TypeInfoFactory.longTypeInfo), /** * {@link org.apache.hadoop.hive.ql.io.RecordIdentifier} */ - public static final VirtualColumn ROWID = new VirtualColumn("ROW__ID", RecordIdentifier.StructInfo.typeInfo, true, RecordIdentifier.StructInfo.oi); + ROWID("ROW__ID", RecordIdentifier.StructInfo.typeInfo, true, RecordIdentifier.StructInfo.oi), /** * GROUPINGID is used with GROUP BY GROUPINGS SETS, ROLLUP and CUBE. @@ -62,8 +58,7 @@ public class VirtualColumn implements Serializable { * set if that column has been aggregated in that row. Otherwise the * value is "0". Returns the decimal representation of the bit vector. */ - public static final VirtualColumn GROUPINGID = - new VirtualColumn("GROUPING__ID", (PrimitiveTypeInfo) TypeInfoFactory.intTypeInfo); + GROUPINGID("GROUPING__ID", TypeInfoFactory.intTypeInfo); public static ImmutableSet<String> VIRTUAL_COLUMN_NAMES = ImmutableSet.of(FILENAME.getName(), BLOCKOFFSET.getName(), ROWOFFSET.getName(), @@ -74,12 +69,12 @@ public class VirtualColumn implements Serializable { private final boolean isHidden; private final ObjectInspector oi; - private VirtualColumn(String name, PrimitiveTypeInfo typeInfo) { + VirtualColumn(String name, PrimitiveTypeInfo typeInfo) { this(name, typeInfo, true, PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo)); } - private VirtualColumn(String name, TypeInfo typeInfo, boolean isHidden, ObjectInspector oi) { + VirtualColumn(String name, TypeInfo typeInfo, boolean isHidden, ObjectInspector oi) { this.name = name; this.typeInfo = typeInfo; this.isHidden = isHidden; @@ -124,24 +119,6 @@ public class VirtualColumn implements Serializable { return oi; } - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if(!(o instanceof VirtualColumn)) { - return false; - } - VirtualColumn c = (VirtualColumn) o; - return this.name.equals(c.name) - && this.typeInfo.getTypeName().equals(c.getTypeInfo().getTypeName()); - } - @Override - public int hashCode() { - int c = 19; - c = 31 * name.hashCode() + c; - return 31 * typeInfo.getTypeName().hashCode() + c; - } public static Collection<String> removeVirtualColumns(final Collection<String> columns) { Iterables.removeAll(columns, VIRTUAL_COLUMN_NAMES); return columns;
