Author: jitendra
Date: Tue Mar 18 18:42:51 2014
New Revision: 1579010
URL: http://svn.apache.org/r1579010
Log:
HIVE-6639. Vectorization: Partition column names are not picked up. (reviewed
by Vikram)
Modified:
hive/branches/branch-0.13/common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
hive/branches/branch-0.13/ql/src/test/queries/clientpositive/vectorization_part.q
hive/branches/branch-0.13/ql/src/test/results/clientpositive/vectorization_part.q.out
Modified:
hive/branches/branch-0.13/common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java
URL:
http://svn.apache.org/viewvc/hive/branches/branch-0.13/common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java?rev=1579010&r1=1579009&r2=1579010&view=diff
==============================================================================
---
hive/branches/branch-0.13/common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java
(original)
+++
hive/branches/branch-0.13/common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java
Tue Mar 18 18:42:51 2014
@@ -852,7 +852,12 @@ public class TestDecimal128 {
Decimal128 d12 = new Decimal128(27.000, (short)3);
HiveDecimal hd7 = HiveDecimal.create(new BigDecimal("27.000"));
assertEquals(hd7.toString(), d12.getHiveDecimalString());
- assertEquals("27", hd7.toString());
+ assertEquals("27", d12.getHiveDecimalString());
+
+ Decimal128 d13 = new Decimal128(1234123000, (short)3);
+ HiveDecimal hd8 = HiveDecimal.create(new BigDecimal("1234123000"));
+ assertEquals(hd8.toString(), d13.getHiveDecimalString());
+ assertEquals("1234123000", d13.getHiveDecimalString());
}
@Test
Modified:
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java
URL:
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java?rev=1579010&r1=1579009&r2=1579010&view=diff
==============================================================================
---
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java
(original)
+++
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java
Tue Mar 18 18:42:51 2014
@@ -53,6 +53,8 @@ public class CastDecimalToTimestamp exte
@Override
protected void func(LongColumnVector outV, DecimalColumnVector inV, int i) {
tmp.update(inV.vector[i]);
+
+ // Reduce scale at most by 9, therefore multiplication will not require
rounding.
int newScale = inV.scale > 9 ? (inV.scale - 9) : 0;
tmp.multiplyDestructive(tenE9, (short) newScale);
Modified:
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL:
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1579010&r1=1579009&r2=1579010&view=diff
==============================================================================
---
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
(original)
+++
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
Tue Mar 18 18:42:51 2014
@@ -55,6 +55,7 @@ import org.apache.hadoop.hive.ql.lib.Tas
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.parse.RowResolver;
import org.apache.hadoop.hive.ql.parse.SemanticException;
@@ -421,7 +422,7 @@ public class Vectorizer implements Physi
VectorizationContext vContext = null;
if (op instanceof TableScanOperator) {
- vContext = getVectorizationContext(op, physicalContext);
+ vContext = getVectorizationContext((TableScanOperator) op,
physicalContext);
for (String onefile : mWork.getPathToAliases().keySet()) {
List<String> aliases = mWork.getPathToAliases().get(onefile);
for (String alias : aliases) {
@@ -719,27 +720,20 @@ public class Vectorizer implements Physi
return supportedDataTypesPattern.matcher(type.toLowerCase()).matches();
}
- private VectorizationContext getVectorizationContext(Operator<? extends
OperatorDesc> op,
+ private VectorizationContext getVectorizationContext(TableScanOperator op,
PhysicalContext pctx) {
RowSchema rs = op.getSchema();
Map<String, Integer> cmap = new HashMap<String, Integer>();
int columnCount = 0;
for (ColumnInfo c : rs.getSignature()) {
- if (!c.getIsVirtualCol()) {
+ if (!isVirtualColumn(c)) {
cmap.put(c.getInternalName(), columnCount++);
}
}
- PrunedPartitionList partList =
pctx.getParseContext().getOpToPartList().get(op);
- if (partList != null) {
- Table tab = partList.getSourceTable();
- if (tab.getPartitionKeys() != null) {
- for (FieldSchema fs : tab.getPartitionKeys()) {
- cmap.put(fs.getName(), columnCount++);
- }
- }
- }
- return new VectorizationContext(cmap, columnCount);
+
+ VectorizationContext vc = new VectorizationContext(cmap, columnCount);
+ return vc;
}
Operator<? extends OperatorDesc> vectorizeOperator(Operator<? extends
OperatorDesc> op,
@@ -778,4 +772,16 @@ public class Vectorizer implements Physi
}
return vectorOp;
}
+
+ private boolean isVirtualColumn(ColumnInfo column) {
+
+ // Not using method column.getIsVirtualCol() because partitioning columns
are also
+ // treated as virtual columns in ColumnInfo.
+ for (VirtualColumn vc : VirtualColumn.VIRTUAL_COLUMNS) {
+ if (column.getInternalName().equals(vc.getName())) {
+ return true;
+ }
+ }
+ return false;
+ }
}
Modified:
hive/branches/branch-0.13/ql/src/test/queries/clientpositive/vectorization_part.q
URL:
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/queries/clientpositive/vectorization_part.q?rev=1579010&r1=1579009&r2=1579010&view=diff
==============================================================================
---
hive/branches/branch-0.13/ql/src/test/queries/clientpositive/vectorization_part.q
(original)
+++
hive/branches/branch-0.13/ql/src/test/queries/clientpositive/vectorization_part.q
Tue Mar 18 18:42:51 2014
@@ -4,3 +4,4 @@ insert overwrite table alltypesorc_part
insert overwrite table alltypesorc_part partition (ds='2012') select * from
alltypesorc limit 100;
select count(cdouble), cint from alltypesorc_part where ds='2011' group by
cint limit 10;
+select count(*) from alltypesorc_part A join alltypesorc_part B on A.ds=B.ds;
Modified:
hive/branches/branch-0.13/ql/src/test/results/clientpositive/vectorization_part.q.out
URL:
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/results/clientpositive/vectorization_part.q.out?rev=1579010&r1=1579009&r2=1579010&view=diff
==============================================================================
---
hive/branches/branch-0.13/ql/src/test/results/clientpositive/vectorization_part.q.out
(original)
+++
hive/branches/branch-0.13/ql/src/test/results/clientpositive/vectorization_part.q.out
Tue Mar 18 18:42:51 2014
@@ -92,3 +92,40 @@ POSTHOOK: Lineage: alltypesorc_part PART
POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp2 SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp,
comment:from deserializer), ]
POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctinyint SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:from
deserializer), ]
100 528534767
+PREHOOK: query: select count(*) from alltypesorc_part A join alltypesorc_part
B on A.ds=B.ds
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc_part
+PREHOOK: Input: default@alltypesorc_part@ds=2011
+PREHOOK: Input: default@alltypesorc_part@ds=2012
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from alltypesorc_part A join alltypesorc_part
B on A.ds=B.ds
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc_part
+POSTHOOK: Input: default@alltypesorc_part@ds=2011
+POSTHOOK: Input: default@alltypesorc_part@ds=2012
+#### A masked pattern was here ####
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cbigint SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:from
deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cboolean1 SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean,
comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cboolean2 SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean,
comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cdouble SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from
deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cfloat SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:from
deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cint SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).csmallint SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint,
comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cstring1 SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from
deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cstring2 SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:from
deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).ctimestamp1 SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp,
comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).ctimestamp2 SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp,
comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).ctinyint SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:from
deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cbigint SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:from
deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cboolean1 SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean,
comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cboolean2 SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean,
comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cdouble SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from
deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cfloat SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:from
deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cint SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from
deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).csmallint SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint,
comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cstring1 SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from
deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cstring2 SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:from
deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp1 SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp,
comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp2 SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp,
comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctinyint SIMPLE
[(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:from
deserializer), ]
+20000